int P_zunmqr( const char *side, const char *trans, int M, int N, int K, void *A, int LDA, void *T, void *B, int LDB ) { PLASMA_enum s, t; int info; if (*side == 'L') { s = PlasmaLeft; } else { s = PlasmaRight; } if (*trans == 'C') { t = PlasmaConjTrans; } else { t = PlasmaNoTrans; } #if CHECK_VERSION_BEQ(2,4,5) info = PLASMA_zunmqr(s, t, M, N, K, A, LDA, T, B, LDB); #else PLASMA_desc *descT; int NB, IB; int MT, NT; /* Get autotuned or set tile size; T matrix allocated with R */ PLASMA_Alloc_Workspace_zgeqrf(1, 1, &descT); PLASMA_Get(PLASMA_TILE_SIZE, &NB); PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &IB); PLASMA_Dealloc_Handle_Tile(&descT); MT = (M%NB==0) ? (M/NB) : (M/NB+1); NT = (N%NB==0) ? (N/NB) : (N/NB+1); // possibly allocate space for descT in R and keep it in qr object instead info = PLASMA_Desc_Create(&descT, T, PlasmaComplexDouble, IB, NB, IB*NB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB); info = PLASMA_zunmqr(s, t, M, N, K, A, LDA, descT, B, LDB); PLASMA_Desc_Destroy(&descT); #endif return(info); }
int P_dgeqrf( int M, int N, double *A, double *T ) { int info; #if CHECK_VERSION_BEQ(2,4,5) info = PLASMA_dgeqrf(M, N, A, M, T); #else PLASMA_desc *descT; int NB, IB; int MT, NT; /* Get autotuned or set tile size; T matrix allocated with R */ PLASMA_Alloc_Workspace_dgeqrf(1, 1, &descT); PLASMA_Get(PLASMA_TILE_SIZE, &NB); PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &IB); PLASMA_Dealloc_Handle_Tile(&descT); MT = (M%NB==0) ? (M/NB) : (M/NB+1); NT = (N%NB==0) ? (N/NB) : (N/NB+1); // possibly allocate space for descT in R and keep it in qr object instead info = PLASMA_Desc_Create(&descT, T, PlasmaRealDouble, IB, NB, IB*NB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB); /* printf("MB=%d NB=%d BSIZ=%d LM=%d LN=%d M=%d N=%d MT=%d NT=%d\n", descT->mb, descT->nb, descT->bsiz, descT->lm, descT->ln, descT->m, descT->n, descT->mt, descT->nt); */ info = PLASMA_dgeqrf(M, N, A, M, descT); PLASMA_Desc_Destroy(&descT); #endif return(info); }
int P_zungqr( int M, int N, int K, void *A, int LDA, void *T, void *Q, int LDQ ) { int info; #if CHECK_VERSION_BEQ(2,4,5) info = PLASMA_zungqr(M, N, K, A, LDA, T, Q, LDQ); #else PLASMA_desc *descT; int NB, IB; int MT, NT; /* Get autotuned or set tile size; T matrix allocated with R */ PLASMA_Alloc_Workspace_dgeqrf(1, 1, &descT); PLASMA_Get(PLASMA_TILE_SIZE, &NB); PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &IB); PLASMA_Dealloc_Handle_Tile(&descT); MT = (M%NB==0) ? (M/NB) : (M/NB+1); NT = (N%NB==0) ? (N/NB) : (N/NB+1); // possibly allocate space for descT in R and keep it in qr object instead info = PLASMA_Desc_Create(&descT, T, PlasmaComplexDouble, IB, NB, IB*NB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB); info = PLASMA_zungqr(M, N, K, A, LDA, descT, Q, LDQ); PLASMA_Desc_Destroy(&descT); #endif return(info); }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { double *AT; real_Double_t t; PLASMA_desc *descA; int nb, nb2, nt; int n = iparam[TIMING_N]; int check = iparam[TIMING_CHECK]; PLASMA_enum uplo = PlasmaLower; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* } */ nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); /* Allocate Data */ AT = (double *)malloc(nt*nt*nb2*sizeof(double)); /* Check if unable to allocate memory */ if ( !AT ){ printf("Out of Memory \n "); exit(0); } /* * Initialize Data * It's done in static to avoid having the same sequence than one * the function we want to trace */ PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); PLASMA_Desc_Create(&descA, AT, PlasmaRealDouble, nb, nb, nb*nb, n, n, 0, 0, n, n); PLASMA_dplgsy_Tile( (double)n, descA, 51 ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /* Save AT in lapack layout for check */ if ( check ) { } /* PLASMA DPOTRF / DTRTRI / DLAUUM */ /* * Example of the different way to combine several asynchonous calls */ { #if defined(TRACE_BY_SEQUENCE) PLASMA_sequence *sequence[3]; PLASMA_request request[3] = { PLASMA_REQUEST_INITIALIZER, PLASMA_REQUEST_INITIALIZER, PLASMA_REQUEST_INITIALIZER }; PLASMA_Sequence_Create(&sequence[0]); PLASMA_Sequence_Create(&sequence[1]); PLASMA_Sequence_Create(&sequence[2]); t = -cWtime(); #if defined(POTRI_SYNC) PLASMA_dpotrf_Tile_Async(uplo, descA, sequence[0], &request[0]); PLASMA_Sequence_Wait(sequence[0]); PLASMA_dtrtri_Tile_Async(uplo, PlasmaNonUnit, descA, sequence[1], &request[1]); PLASMA_Sequence_Wait(sequence[1]); PLASMA_dlauum_Tile_Async(uplo, descA, sequence[2], &request[2]); PLASMA_Sequence_Wait(sequence[2]); #else PLASMA_dpotrf_Tile_Async(uplo, descA, sequence[0], &request[0]); PLASMA_dtrtri_Tile_Async(uplo, PlasmaNonUnit, descA, sequence[1], &request[1]); PLASMA_dlauum_Tile_Async(uplo, descA, sequence[2], &request[2]); PLASMA_Sequence_Wait(sequence[0]); PLASMA_Sequence_Wait(sequence[1]); PLASMA_Sequence_Wait(sequence[2]); #endif t += cWtime(); PLASMA_Sequence_Destroy(sequence[0]); PLASMA_Sequence_Destroy(sequence[1]); PLASMA_Sequence_Destroy(sequence[2]); #else #if defined(POTRI_SYNC) t = -cWtime(); PLASMA_dpotrf_Tile(uplo, descA); PLASMA_dtrtri_Tile(uplo, PlasmaNonUnit, descA); PLASMA_dlauum_Tile(uplo, descA); t += cWtime(); #else /* Default: we use Asynchonous call with only one sequence */ PLASMA_sequence *sequence; PLASMA_request request[2] = { PLASMA_REQUEST_INITIALIZER, PLASMA_REQUEST_INITIALIZER }; t = -cWtime(); PLASMA_Sequence_Create(&sequence); PLASMA_dpotrf_Tile_Async(uplo, descA, sequence, &request[0]); PLASMA_dpotri_Tile_Async(uplo, descA, sequence, &request[1]); PLASMA_Sequence_Wait(sequence); t += cWtime(); PLASMA_Sequence_Destroy(sequence); #endif #endif *t_ = t; } /* Check the solution */ if ( check ) { dparam[TIMING_ANORM] = 0.0; dparam[TIMING_XNORM] = 0.0; dparam[TIMING_BNORM] = 0.0; dparam[TIMING_RES] = 0.0; } PLASMA_Desc_Destroy(&descA); PLASMA_Finalize(); free(AT); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PLASMA_Complex64_t *A = NULL, *AT, *b, *bT, *x; PLASMA_desc *descA, *descB, *descL; real_Double_t t; int *piv; int nb, nb2, nt; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); /* Allocate Data */ AT = (PLASMA_Complex64_t *)malloc(nt*nt*nb2*sizeof(PLASMA_Complex64_t)); /* Check if unable to allocate memory */ if ( !AT ){ printf("Out of Memory \n "); exit(0); } /* Initialiaze Data */ PLASMA_Desc_Create(&descA, AT, PlasmaComplexDouble, nb, nb, nb*nb, n, n, 0, 0, n, n); LAPACKE_zlarnv_work(1, ISEED, nt*nt*nb2, AT); /* Allocate Workspace */ PLASMA_Alloc_Workspace_zgesv_incpiv_Tile(n, &descL, &piv); /* Save AT in lapack layout for check */ if ( check ) { A = (PLASMA_Complex64_t *)malloc(lda*n *sizeof(PLASMA_Complex64_t)); PLASMA_Tile_to_Lapack(descA, (void*)A, n); } t = -cWtime(); PLASMA_zgetrf_incpiv_Tile( descA, descL, piv ); t += cWtime(); *t_ = t; /* Check the solution */ if ( check ) { b = (PLASMA_Complex64_t *)malloc(ldb*nrhs *sizeof(PLASMA_Complex64_t)); bT = (PLASMA_Complex64_t *)malloc(nt*nb2 *sizeof(PLASMA_Complex64_t)); x = (PLASMA_Complex64_t *)malloc(ldb*nrhs *sizeof(PLASMA_Complex64_t)); LAPACKE_zlarnv_work(1, ISEED, n*nrhs, b); PLASMA_Desc_Create(&descB, bT, PlasmaComplexDouble, nb, nb, nb*nb, n, nrhs, 0, 0, n, nrhs); PLASMA_Lapack_to_Tile((void*)b, n, descB); PLASMA_zgetrs_incpiv_Tile( descA, descL, piv, descB ); PLASMA_Tile_to_Lapack(descB, (void*)x, n); dparam[TIMING_RES] = z_check_solution(n, n, nrhs, A, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); PLASMA_Desc_Destroy(&descB); free( A ); free( b ); free( bT ); free( x ); } /* Deallocate Workspace */ PLASMA_Dealloc_Handle_Tile(&descL); PLASMA_Desc_Destroy(&descA); free( AT ); free( piv ); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { float *AT, *BT, *CT; float *A = NULL, *B = NULL, *C1 = NULL, *C2 = NULL; float alpha, beta; PLASMA_desc *descA, *descB, *descC; real_Double_t t; int nb, nb2, nt; int n = iparam[TIMING_N]; int check = iparam[TIMING_CHECK]; int lda = n; /* Allocate Data */ /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); /* } */ /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* } */ nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); AT = (float *)malloc(nt*nt*nb2*sizeof(float)); BT = (float *)malloc(nt*nt*nb2*sizeof(float)); CT = (float *)malloc(nt*nt*nb2*sizeof(float)); /* Check if unable to allocate memory */ if ( (!AT) || (!BT) || (!CT) ) { printf("Out of Memory \n "); exit(0); } #if defined(PLASMA_CUDA) cudaHostRegister(AT, nt*nt*nb2*sizeof(float), cudaHostRegisterPortable); cudaHostRegister(BT, nt*nt*nb2*sizeof(float), cudaHostRegisterPortable); cudaHostRegister(CT, nt*nt*nb2*sizeof(float), cudaHostRegisterPortable); #endif /* Initialiaze Data */ LAPACKE_slarnv_work(1, ISEED, 1, &alpha); LAPACKE_slarnv_work(1, ISEED, 1, &beta); LAPACKE_slarnv_work(1, ISEED, nt*nt*nb2, AT); LAPACKE_slarnv_work(1, ISEED, nt*nt*nb2, BT); LAPACKE_slarnv_work(1, ISEED, nt*nt*nb2, CT); /* Initialize AT and bT for Symmetric Positif Matrix */ PLASMA_Desc_Create(&descA, AT, PlasmaRealFloat, nb, nb, nb*nb, n, n, 0, 0, n, n); PLASMA_Desc_Create(&descB, BT, PlasmaRealFloat, nb, nb, nb*nb, n, n, 0, 0, n, n); PLASMA_Desc_Create(&descC, CT, PlasmaRealFloat, nb, nb, nb*nb, n, n, 0, 0, n, n); if (check) { C2 = (float *)malloc(n*lda*sizeof(float)); PLASMA_Tile_to_Lapack(descC, (void*)C2, n); } #if defined(PLASMA_CUDA) core_cublas_init(); #endif t = -cWtime(); PLASMA_sgemm_Tile( PlasmaNoTrans, PlasmaNoTrans, alpha, descA, descB, beta, descC ); t += cWtime(); *t_ = t; /* Check the solution */ if (check) { A = (float *)malloc(n*lda*sizeof(float)); PLASMA_Tile_to_Lapack(descA, (void*)A, n); free(AT); B = (float *)malloc(n*lda*sizeof(float)); PLASMA_Tile_to_Lapack(descB, (void*)B, n); free(BT); C1 = (float *)malloc(n*lda*sizeof(float)); PLASMA_Tile_to_Lapack(descC, (void*)C1, n); free(CT); dparam[TIMING_RES] = s_check_gemm( PlasmaNoTrans, PlasmaNoTrans, n, n, n, alpha, A, lda, B, lda, beta, C1, C2, lda, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); free(C2); } else { free( AT ); free( BT ); free( CT ); } PLASMA_Desc_Destroy(&descA); PLASMA_Desc_Destroy(&descB); PLASMA_Desc_Destroy(&descC); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { double *A = NULL, *AT, *b = NULL, *bT, *x; PLASMA_desc *descA, *descB, *descT; real_Double_t t; int nb, nb2, nt; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); #if defined(PLASMA_CUDA) core_cublas_init(); #endif /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); /* Allocate Data */ AT = (double *)malloc(nt*nt*nb2*sizeof(double)); /* Check if unable to allocate memory */ if ( !AT ){ printf("Out of Memory \n "); exit(0); } #if defined(PLASMA_CUDA) cudaHostRegister((void*)AT, nt*nt*nb2*sizeof(double), cudaHostRegisterPortable); #endif /* Initialiaze Data */ PLASMA_Desc_Create(&descA, AT, PlasmaRealDouble, nb, nb, nb*nb, n, n, 0, 0, n, n); LAPACKE_dlarnv_work(1, ISEED, nt*nt*nb2, AT); /* Allocate Workspace */ PLASMA_Alloc_Workspace_dgels_Tile(n, n, &descT); #if defined(PLASMA_CUDA) cudaHostRegister((void*)descT->mat, descT->lm*descT->ln*sizeof(double), cudaHostRegisterPortable); #endif /* Save AT in lapack layout for check */ if ( check ) { A = (double *)malloc(lda*n *sizeof(double)); PLASMA_Tile_to_Lapack(descA, (void*)A, n); } t = -cWtime(); PLASMA_dgeqrf_Tile( descA, descT ); t += cWtime(); *t_ = t; /* Check the solution */ if ( check ) { b = (double *)malloc(ldb*nrhs *sizeof(double)); bT = (double *)malloc(nt*nb2 *sizeof(double)); x = (double *)malloc(ldb*nrhs *sizeof(double)); LAPACKE_dlarnv_work(1, ISEED, nt*nb2, bT); PLASMA_Desc_Create(&descB, bT, PlasmaRealDouble, nb, nb, nb*nb, n, nrhs, 0, 0, n, nrhs); PLASMA_Tile_to_Lapack(descB, (void*)b, n); PLASMA_dgeqrs_Tile( descA, descT, descB ); PLASMA_Tile_to_Lapack(descB, (void*)x, n); dparam[TIMING_RES] = d_check_solution(n, n, nrhs, A, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); PLASMA_Desc_Destroy(&descB); free( A ); free( b ); free( bT ); free( x ); } /* Allocate Workspace */ PLASMA_Dealloc_Handle_Tile(&descT); PLASMA_Desc_Destroy(&descA); free( AT ); PLASMA_Finalize(); #if defined(PLASMA_CUDA) #endif return 0; }
static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { float *AT, *Q = NULL; float *W; PLASMA_desc *descA = NULL; PLASMA_desc *descQ = NULL; PLASMA_desc *descT = NULL; real_Double_t t; int nb, nb2, nt; int n = iparam[TIMING_N]; int check = iparam[TIMING_CHECK]; int lda = n; int uplo = PlasmaUpper; int vec = PlasmaNoVec; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); /* Allocate Data */ AT = (float *)malloc(lda*n*sizeof(float)); W = (float *)malloc(n*sizeof(float)); if (vec == PlasmaVec){ Q = (float *)malloc(lda*n*sizeof(float)); if ( (!Q) ) { printf("Out of Memory -Q-\n "); return -2; } } /* Check if unable to allocate memory */ if ( (!AT) || (!W) ) { printf("Out of Memory -\n "); return -2; } /* Initialize Data */ PLASMA_Desc_Create(&descA, AT, PlasmaRealFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n); PLASMA_splgsy_Tile((float)0.0, descA, 51 ); if (vec == PlasmaVec) PLASMA_Desc_Create(&descQ, Q, PlasmaRealFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n); /* Save AT and bT in lapack layout for check */ if ( check ) { } /* Allocate Workspace */ PLASMA_Alloc_Workspace_ssyev(n, n, &descT); t = -cWtime(); PLASMA_ssyev_Tile( vec, uplo, descA, W, descT, descQ ); t += cWtime(); *t_ = t; /* Check the solution */ if ( check ) { } /* DeAllocate Workspace */ PLASMA_Dealloc_Handle_Tile(&descT); PLASMA_Desc_Destroy(&descA); if (vec == PlasmaVec) { PLASMA_Desc_Destroy(&descQ); free( Q ); } free( AT ); free( W ); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { float *AT, *bT, *x; float *A = NULL; float *b = NULL; PLASMA_desc *descA, *descB; real_Double_t t; int *piv; int n = iparam[TIMING_N]; int nb = iparam[TIMING_NB]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ /* Allocate Data */ AT = (float *)malloc(lda*n *sizeof(float)); bT = (float *)malloc(ldb*nrhs*sizeof(float)); piv = (int *)malloc( n*sizeof(int)); /* Check if unable to allocate memory */ if ( (!AT) || (!bT) || (!piv) ) { printf("Out of Memory \n "); return -1; } /* Initialize AT and bT for Symmetric Positif Matrix */ PLASMA_Desc_Create(&descA, AT, PlasmaRealFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n); PLASMA_Desc_Create(&descB, bT, PlasmaRealFloat, nb, nb, nb*nb, ldb, nrhs, 0, 0, n, nrhs); LAPACKE_slarnv_work(1, ISEED, lda*n, AT); LAPACKE_slarnv_work(1, ISEED, ldb*nrhs, bT); /* Save AT and bT in lapack layout for check */ if ( check ) { A = (float *)malloc(lda*n *sizeof(float)); b = (float *)malloc(ldb*nrhs*sizeof(float)); PLASMA_sTile_to_Lapack(descA, (void*)A, lda); PLASMA_sTile_to_Lapack(descB, (void*)b, ldb); } t = -cWtime(); PLASMA_sgesv_Tile( descA, piv, descB ); t += cWtime(); *t_ = t; /* Check the solution */ if ( check ) { x = (float *)malloc(ldb*nrhs *sizeof(float)); PLASMA_sTile_to_Lapack(descB, (void*)x, n); dparam[TIMING_RES] = s_check_solution(n, n, nrhs, A, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); free(A); free(b); free(x); } PLASMA_Desc_Destroy(&descA); PLASMA_Desc_Destroy(&descB); free( AT ); free( bT ); free( piv ); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { plasma_context_t *plasma; Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer; PLASMA_Complex32_t *A, *AT, *A2 = NULL; PLASMA_desc *descA; real_Double_t t; int *ipiv, *ipiv2 = NULL; int i; int nb = iparam[TIMING_NB]; int m = iparam[TIMING_N]; int n = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = m; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* Allocate Data */ A = (PLASMA_Complex32_t *)malloc(lda*n*sizeof(PLASMA_Complex32_t)); AT = (PLASMA_Complex32_t *)malloc(lda*n*sizeof(PLASMA_Complex32_t)); /* Check if unable to allocate memory */ if ( ( !AT ) || (! A) ) { printf("Out of Memory \n "); return -1; } /* Initialiaze Data */ LAPACKE_clarnv_work(1, ISEED, lda*n, A); /* for(i=0; i<n; i++) { */ /* A[i*lda+i] += (float)m; */ /* } */ PLASMA_Desc_Create(&descA, AT, PlasmaComplexFloat, nb, nb, nb*nb, lda, n, 0, 0, m, n); PLASMA_cLapack_to_Tile((void*)A, lda, descA); /* Allocate Workspace */ ipiv = (int *)malloc( n*sizeof(int) ); /* Save AT in lapack layout for check */ if ( check ) { A2 = (PLASMA_Complex32_t *)malloc(lda*n*sizeof(PLASMA_Complex32_t)); ipiv2 = (int *)malloc( n*sizeof(int) ); LAPACKE_clacpy_work(LAPACK_COL_MAJOR,' ', m, n, A, lda, A2, lda); LAPACKE_cgetrf_work(LAPACK_COL_MAJOR, m, n, A2, lda, ipiv2 ); } plasma = plasma_context_self(); PLASMA_Sequence_Create(&sequence); QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence); QUARK_Task_Flag_Set(&task_flags, TASK_THREAD_COUNT, iparam[TIMING_THRDNBR] ); plasma_dynamic_spawn(); CORE_cgetrf_rectil_init(); t = -cWtime(); QUARK_CORE_cgetrf_rectil(plasma->quark, &task_flags, *descA, AT, descA->mb*descA->nb, ipiv, sequence, &request, 0, 0, iparam[TIMING_THRDNBR]); PLASMA_Sequence_Wait(sequence); t += cWtime(); *t_ = t; PLASMA_Sequence_Destroy(sequence); /* Check the solution */ if ( check ) { float *work = (float *)malloc(max(m,n)*sizeof(float)); PLASMA_cTile_to_Lapack(descA, (void*)A, lda); /* Check ipiv */ for(i=0; i<n; i++) { if( ipiv[i] != ipiv2[i] ) { fprintf(stderr, "\nPLASMA (ipiv[%d] = %d, A[%d] = %e) / LAPACK (ipiv[%d] = %d, A[%d] = [%e])\n", i, ipiv[i], i, crealf(A[ i * lda + i ]), i, ipiv2[i], i, crealf(A2[ i * lda + i ])); break; } } dparam[TIMING_ANORM] = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A, lda, work); dparam[TIMING_XNORM] = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A2, lda, work); dparam[TIMING_BNORM] = 0.0; CORE_caxpy( m, n, -1.0, A, lda, A2, lda); dparam[TIMING_RES] = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A2, lda, work); free( A2 ); free( ipiv2 ); free( work ); } /* Deallocate Workspace */ PLASMA_Desc_Destroy(&descA); free( A ); free( AT ); free( ipiv ); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, _PREC *dparam, real_Double_t *t_) { PLASMA_Complex32_t *A = NULL, *AT; PLASMA_desc *descA; real_Double_t t; int n = iparam[TIMING_N]; int nb = iparam[TIMING_NB]; int check = iparam[TIMING_CHECK]; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); /* } */ n = ((n % nb) == 0) ? (n / nb) * nb : ((n / nb) + 1) * nb ; dparam[TIMING_ANORM] = (_PREC)n; /* Allocate Data */ AT = (PLASMA_Complex32_t *)malloc(n*n*sizeof(PLASMA_Complex32_t)); /* Check if unable to allocate memory */ if ( (!AT) ) { printf("Out of Memory \n "); exit(0); } /* Initialiaze Data */ PLASMA_Desc_Create(&descA, AT, PlasmaComplexFloat, nb, nb, nb*nb, n, n, 0, 0, n, n); LAPACKE_clarnv_work(1, ISEED, n*n, AT); /* Save A and b */ if (check) { A = (PLASMA_Complex32_t *)malloc(n*n*sizeof(PLASMA_Complex32_t)); LAPACKE_clacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), n, n, AT, n, A, n); } t = -cWtime(); PLASMA_Lapack_to_Tile( (void *)A, n, descA); t += cWtime(); *t_ = t; /* Check the solution */ if (check) { dparam[TIMING_RES] = (_PREC)c_check_conversion(n, n, n, 1, nb, nb, A, AT, map_CM, map_CCRB); free(A); } PLASMA_Desc_Destroy(&descA); free( AT ); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PLASMA_Complex64_t *AT; PLASMA_desc *descA, *descT; real_Double_t t; int nb; int M = iparam[TIMING_N]; int N = iparam[TIMING_M]; //int N = M/nrhs; //RUN WITH NRHS = 10 or 20 (ALSO USED IN TIMING.C) int lda = M; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ nb = iparam[TIMING_NB]; /* Householder mode */ //PLASMA_Set(PLASMA_HOUSEHOLDER_MODE, PLASMA_FLAT_HOUSEHOLDER); PLASMA_Set(PLASMA_HOUSEHOLDER_MODE, PLASMA_TREE_HOUSEHOLDER); PLASMA_Set(PLASMA_HOUSEHOLDER_SIZE, 4); /* Allocate Data */ AT = (PLASMA_Complex64_t *)malloc(lda*N*sizeof(PLASMA_Complex64_t)); /* Check if unable to allocate memory */ if ( !AT ){ printf("Out of Memory \n "); exit(0); } /* Initialiaze Data */ PLASMA_Desc_Create(&descA, AT, PlasmaComplexDouble, nb, nb, nb*nb, M, N, 0, 0, M, N); LAPACKE_zlarnv_work(1, ISEED, lda*N, AT); /* Allocate Workspace */ PLASMA_Alloc_Workspace_zgels_Tile(M, N, &descT); t = -cWtime(); PLASMA_zgeqrf_Tile( descA, descT ); t += cWtime(); *t_ = t; /* Allocate Workspace */ PLASMA_Dealloc_Handle_Tile(&descT); PLASMA_Desc_Destroy(&descA); free( AT ); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { float *A = NULL, *AT, *b = NULL, *bT, *x; real_Double_t t; PLASMA_desc *descA, *descB; int nb, nb2, nt; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* } */ nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); /* Allocate Data */ AT = (float *)malloc(nt*nt*nb2*sizeof(float)); /* Check if unable to allocate memory */ if ( !AT ){ printf("Out of Memory \n "); exit(0); } /* Initialiaze Data */ PLASMA_Desc_Create(&descA, AT, PlasmaRealFloat, nb, nb, nb*nb, n, n, 0, 0, n, n); PLASMA_splgsy_Tile((float)n, descA, 51 ); /* Save AT in lapack layout for check */ if ( check ) { A = (float *)malloc(lda*n *sizeof(float)); PLASMA_Tile_to_Lapack(descA, (void*)A, n); } /* PLASMA SPOSV */ t = -cWtime(); PLASMA_spotrf_Tile(PlasmaUpper, descA); t += cWtime(); *t_ = t; /* Check the solution */ if ( check ) { b = (float *)malloc(ldb*nrhs *sizeof(float)); bT = (float *)malloc(nt*nb2 *sizeof(float)); x = (float *)malloc(ldb*nrhs *sizeof(float)); LAPACKE_slarnv_work(1, ISEED, nt*nb2, bT); PLASMA_Desc_Create(&descB, bT, PlasmaRealFloat, nb, nb, nb*nb, n, nrhs, 0, 0, n, nrhs); PLASMA_Tile_to_Lapack(descB, (void*)b, n); PLASMA_spotrs_Tile( PlasmaUpper, descA, descB ); PLASMA_Tile_to_Lapack(descB, (void*)x, n); dparam[TIMING_RES] = s_check_solution(n, n, nrhs, A, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); PLASMA_Desc_Destroy(&descB); free( A ); free( b ); free( bT ); free( x ); } PLASMA_Desc_Destroy(&descA); free(AT); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { PLASMA_Complex32_t *AT, *BT, *Q = NULL; float *W; PLASMA_desc *descA, *descB, *descQ, *descT; real_Double_t t; int nb, nb2, nt; int n = iparam[TIMING_N]; int check = iparam[TIMING_CHECK]; int lda = n; int itype = 1; int vec = PlasmaNoVec; int uplo = PlasmaUpper; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); // if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); /* else */ /* PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); */ /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); /* Allocate Data */ AT = (PLASMA_Complex32_t *)malloc(nt*nt*nb2*sizeof(PLASMA_Complex32_t)); BT = (PLASMA_Complex32_t *)malloc(nt*nt*nb2*sizeof(PLASMA_Complex32_t)); W = (float *)malloc(n*sizeof(float)); if (vec == PlasmaVec){ Q = (PLASMA_Complex32_t *)malloc(nt*nt*nb2*sizeof(PLASMA_Complex32_t)); if ( (!Q) ) { printf("Out of Memory -Q-\n "); exit(0); } } /* Check if unable to allocate memory */ if ( (!AT) || (!BT) || (!W) ) { printf("Out of Memory -\n "); exit(0); } /* Initialiaze Data */ PLASMA_Desc_Create(&descA, AT, PlasmaComplexFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n); PLASMA_cplghe_Tile((float)0.0, descA, 51 ); PLASMA_Desc_Create(&descB, BT, PlasmaComplexFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n); PLASMA_cplghe_Tile((float)n, descB, 51 ); PLASMA_Desc_Create(&descQ, Q, PlasmaComplexFloat, nb, nb, nb*nb, lda, n, 0, 0, n, n); /* Save AT and bT in lapack layout for check */ if ( check ) { } /* Allocate Workspace */ PLASMA_Alloc_Workspace_chegv(n, n, &descT); t = -cWtime(); PLASMA_chegv_Tile( itype, vec, uplo, descA, descB, W, descT, descQ ); t += cWtime(); *t_ = t; /* Check the solution */ if ( check ) { } /* DeAllocate Workspace */ PLASMA_Dealloc_Handle_Tile(&descT); PLASMA_Desc_Destroy(&descA); PLASMA_Desc_Destroy(&descB); PLASMA_Desc_Destroy(&descQ); if (vec == PlasmaVec) free( Q ); free( AT ); free( W ); PLASMA_Finalize(); return 0; }