static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PLASMA_Complex64_t alpha, beta; PASTE_CODE_IPARAM_LOCALS( iparam ); LDB = max(K, iparam[IPARAM_LDB]); LDC = max(M, iparam[IPARAM_LDC]); /* Allocate Data */ PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, PLASMA_Complex64_t, PlasmaComplexDouble, LDA, M, K ); PASTE_CODE_ALLOCATE_MATRIX_TILE( descB, 1, PLASMA_Complex64_t, PlasmaComplexDouble, LDB, K, N ); PASTE_CODE_ALLOCATE_MATRIX_TILE( descC, 1, PLASMA_Complex64_t, PlasmaComplexDouble, LDC, M, N ); /* Initialiaze Data */ PLASMA_zplrnt_Tile( descA, 5373 ); PLASMA_zplrnt_Tile( descB, 7672 ); PLASMA_zplrnt_Tile( descC, 6387 ); LAPACKE_zlarnv_work(1, ISEED, 1, &alpha); LAPACKE_zlarnv_work(1, ISEED, 1, &beta); /* Save C for check */ PASTE_TILE_TO_LAPACK( descC, C2, check, PLASMA_Complex64_t, LDC, N ); START_TIMING(); PLASMA_zgemm_Tile( PlasmaNoTrans, PlasmaNoTrans, alpha, descA, descB, beta, descC ); STOP_TIMING(); /* Check the solution */ if (check) { PASTE_TILE_TO_LAPACK( descA, A, check, PLASMA_Complex64_t, LDA, K ); PASTE_TILE_TO_LAPACK( descB, B, check, PLASMA_Complex64_t, LDB, N ); PASTE_TILE_TO_LAPACK( descC, C, check, PLASMA_Complex64_t, LDC, N ); dparam[IPARAM_RES] = z_check_gemm( PlasmaNoTrans, PlasmaNoTrans, M, N, K, alpha, A, LDA, B, LDB, beta, C, C2, LDC, &(dparam[IPARAM_ANORM]), &(dparam[IPARAM_BNORM]), &(dparam[IPARAM_XNORM])); free(A); free(B); free(C); free(C2); } PASTE_CODE_FREE_MATRIX( descA ); PASTE_CODE_FREE_MATRIX( descB ); PASTE_CODE_FREE_MATRIX( descC ); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PLASMA_Complex64_t *A = NULL, *AT, *b, *bT, *x; PLASMA_desc *descA, *descB, *descL; real_Double_t t; int *piv; int nb, nb2, nt; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ nb = iparam[TIMING_NB]; nb2 = nb * nb; nt = n / nb + ((n % nb == 0) ? 0 : 1); /* Allocate Data */ AT = (PLASMA_Complex64_t *)malloc(nt*nt*nb2*sizeof(PLASMA_Complex64_t)); /* Check if unable to allocate memory */ if ( !AT ){ printf("Out of Memory \n "); exit(0); } /* Initialiaze Data */ PLASMA_Desc_Create(&descA, AT, PlasmaComplexDouble, nb, nb, nb*nb, n, n, 0, 0, n, n); LAPACKE_zlarnv_work(1, ISEED, nt*nt*nb2, AT); /* Allocate Workspace */ PLASMA_Alloc_Workspace_zgesv_incpiv_Tile(n, &descL, &piv); /* Save AT in lapack layout for check */ if ( check ) { A = (PLASMA_Complex64_t *)malloc(lda*n *sizeof(PLASMA_Complex64_t)); PLASMA_Tile_to_Lapack(descA, (void*)A, n); } t = -cWtime(); PLASMA_zgetrf_incpiv_Tile( descA, descL, piv ); t += cWtime(); *t_ = t; /* Check the solution */ if ( check ) { b = (PLASMA_Complex64_t *)malloc(ldb*nrhs *sizeof(PLASMA_Complex64_t)); bT = (PLASMA_Complex64_t *)malloc(nt*nb2 *sizeof(PLASMA_Complex64_t)); x = (PLASMA_Complex64_t *)malloc(ldb*nrhs *sizeof(PLASMA_Complex64_t)); LAPACKE_zlarnv_work(1, ISEED, n*nrhs, b); PLASMA_Desc_Create(&descB, bT, PlasmaComplexDouble, nb, nb, nb*nb, n, nrhs, 0, 0, n, nrhs); PLASMA_Lapack_to_Tile((void*)b, n, descB); PLASMA_zgetrs_incpiv_Tile( descA, descL, piv, descB ); PLASMA_Tile_to_Lapack(descB, (void*)x, n); dparam[TIMING_RES] = z_check_solution(n, n, nrhs, A, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); PLASMA_Desc_Destroy(&descB); free( A ); free( b ); free( bT ); free( x ); } /* Deallocate Workspace */ PLASMA_Dealloc_Handle_Tile(&descL); PLASMA_Desc_Destroy(&descA); free( AT ); free( piv ); PLASMA_Finalize(); return 0; }
int testing_zgesv_incpiv(int argc, char **argv) { /* Check for valid arguments*/ if (argc != 4){ USAGE("GESV_INCPIV", "N LDA NRHS LDB", " - N : the size of the matrix\n" " - LDA : leading dimension of the matrix A\n" " - NRHS : number of RHS\n" " - LDB : leading dimension of the matrix B\n"); return -1; } int N = atoi(argv[0]); int LDA = atoi(argv[1]); int NRHS = atoi(argv[2]); int LDB = atoi(argv[3]); double eps; int info_solution; int i,j; int LDAxN = LDA*N; int LDBxNRHS = LDB*NRHS; PLASMA_Complex64_t *A1 = (PLASMA_Complex64_t *)malloc(LDA*N*(sizeof*A1)); PLASMA_Complex64_t *A2 = (PLASMA_Complex64_t *)malloc(LDA*N*(sizeof*A2)); PLASMA_Complex64_t *B1 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*(sizeof*B1)); PLASMA_Complex64_t *B2 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*(sizeof*B2)); PLASMA_Complex64_t *L; int *IPIV; /* Check if unable to allocate memory */ if ( (!A1) || (!A2)|| (!B1) || (!B2) ) { printf("Out of Memory \n "); return -2; } eps = BLAS_dfpinfo(blas_eps); /*---------------------------------------------------------- * TESTING ZGESV */ /* Initialize A1 and A2 Matrix */ LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1); for ( i = 0; i < N; i++) for ( j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i]; /* Initialize B1 and B2 */ LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1); for ( i = 0; i < N; i++) for ( j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i]; /* PLASMA ZGESV */ PLASMA_Alloc_Workspace_zgesv_incpiv(N, &L, &IPIV); PLASMA_zgesv_incpiv(N, NRHS, A2, LDA, L, IPIV, B2, LDB); printf("\n"); printf("------ TESTS FOR PLASMA INCPIV ZGESV ROUTINE ------- \n"); printf(" Size of the Matrix %d by %d\n", N, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n", eps); printf(" Computational tests pass if scaled residuals are less than 60.\n"); /* Check the factorization and the solution */ info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB, eps); if ((info_solution == 0)){ printf("***************************************************\n"); printf(" ---- TESTING INCPIV ZGESV ............... PASSED !\n"); printf("***************************************************\n"); } else{ printf("************************************************\n"); printf(" - TESTING INCPIV ZGESV ... FAILED !\n"); printf("************************************************\n"); } /*------------------------------------------------------------- * TESTING ZGETRF + ZGETRS */ /* Initialize A1 and A2 */ LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1); for ( i = 0; i < N; i++) for ( j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i]; /* Initialize B1 and B2 */ LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1); for ( i = 0; i < N; i++) for ( j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i]; /* Plasma routines */ PLASMA_zgetrf_incpiv(N, N, A2, LDA, L, IPIV); PLASMA_zgetrs_incpiv(PlasmaNoTrans, N, NRHS, A2, LDA, L, IPIV, B2, LDB); printf("\n"); printf("------ TESTS FOR PLASMA ZGETRF + ZGETRS ROUTINE ------- \n"); printf(" Size of the Matrix %d by %d\n", N, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n", eps); printf(" Computational tests pass if scaled residuals are less than 60.\n"); /* Check the solution */ info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB, eps); if ((info_solution == 0)){ printf("***************************************************\n"); printf(" ---- TESTING INCPIV ZGETRF + ZGETRS ..... PASSED !\n"); printf("***************************************************\n"); } else{ printf("***************************************************\n"); printf(" - TESTING INCPIV ZGETRF + ZGETRS ... FAILED !\n"); printf("***************************************************\n"); } /*------------------------------------------------------------- * TESTING ZGETRF + ZTRSMPL + ZTRSM */ /* Initialize A1 and A2 */ LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1); for ( i = 0; i < N; i++) for ( j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i]; /* Initialize B1 and B2 */ LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1); for ( i = 0; i < N; i++) for ( j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i]; /* PLASMA routines */ PLASMA_zgetrf_incpiv(N, N, A2, LDA, L, IPIV); PLASMA_ztrsmpl(N, NRHS, A2, LDA, L, IPIV, B2, LDB); PLASMA_ztrsm(PlasmaLeft, PlasmaUpper, PlasmaNoTrans, PlasmaNonUnit, N, NRHS, 1.0, A2, LDA, B2, LDB); printf("\n"); printf("------ TESTS FOR PLASMA INCPIV ZGETRF + ZTRSMPL + ZTRSM ROUTINE ------- \n"); printf(" Size of the Matrix %d by %d\n", N, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n", eps); printf(" Computational tests pass if scaled residuals are less than 60.\n"); /* Check the solution */ info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB, eps); if ((info_solution == 0)){ printf("***************************************************\n"); printf(" ---- TESTING INCPIV ZGETRF + ZTRSMPL + ZTRSM ... PASSED !\n"); printf("***************************************************\n"); } else{ printf("**************************************************\n"); printf(" - TESTING INCPIV ZGETRF + ZTRSMPL + ZTRSM ... FAILED !\n"); printf("**************************************************\n"); } free(A1); free(A2); free(B1); free(B2); free(IPIV); free(L); return 0; }
int testing_zher2k(int argc, char **argv) { /* Check for number of arguments*/ if ( argc != 7 ){ USAGE("HER2K", "alpha beta M N LDA LDB LDC", " - alpha : alpha coefficient\n" " - beta : beta coefficient\n" " - N : number of columns and rows of matrix C and number of row of matrix A and B\n" " - K : number of columns of matrix A and B\n" " - LDA : leading dimension of matrix A\n" " - LDB : leading dimension of matrix B\n" " - LDC : leading dimension of matrix C\n"); return -1; } PLASMA_Complex64_t alpha = (PLASMA_Complex64_t) atol(argv[0]); double beta = (double) atol(argv[1]); int N = atoi(argv[2]); int K = atoi(argv[3]); int LDA = atoi(argv[4]); int LDB = atoi(argv[5]); int LDC = atoi(argv[6]); int NKmax = max(N, K); double eps; int info_solution; int u, t; size_t LDAxK = LDA*NKmax; size_t LDBxK = LDB*NKmax; size_t LDCxN = LDC*N; PLASMA_Complex64_t *A = (PLASMA_Complex64_t *)malloc(LDAxK*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *B = (PLASMA_Complex64_t *)malloc(LDBxK*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *C = (PLASMA_Complex64_t *)malloc(LDCxN*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *Cinit = (PLASMA_Complex64_t *)malloc(LDCxN*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *Cfinal = (PLASMA_Complex64_t *)malloc(LDCxN*sizeof(PLASMA_Complex64_t)); /* Check if unable to allocate memory */ if ( (!A) || (!B) || (!Cinit) || (!Cfinal) ){ printf("Out of Memory \n "); return -2; } eps = LAPACKE_dlamch_work('e'); printf("\n"); printf("------ TESTS FOR PLASMA ZHER2K ROUTINE ------- \n"); printf(" Size of the Matrix C %d by %d\n", N, K); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 10.\n"); /*---------------------------------------------------------- * TESTING ZHER2K */ /* Initialize A,B */ LAPACKE_zlarnv_work(IONE, ISEED, LDAxK, A); LAPACKE_zlarnv_work(IONE, ISEED, LDBxK, B); /* Initialize C */ PLASMA_zplghe( (double)0., N, C, LDC, 51 ); for (u=0; u<2; u++) { for (t=0; t<3; t++) { if (trans[t] == PlasmaTrans) continue; memcpy(Cinit, C, LDCxN*sizeof(PLASMA_Complex64_t)); memcpy(Cfinal, C, LDCxN*sizeof(PLASMA_Complex64_t)); /* PLASMA ZHER2K */ PLASMA_zher2k(uplo[u], trans[t], N, K, alpha, A, LDA, B, LDB, beta, Cfinal, LDC); /* Check the solution */ info_solution = check_solution(uplo[u], trans[t], N, K, alpha, A, LDA, B, LDB, beta, Cinit, Cfinal, LDC); if (info_solution == 0) { printf("***************************************************\n"); printf(" ---- TESTING ZHER2K (%5s, %s) ........... PASSED !\n", uplostr[u], transstr[t]); printf("***************************************************\n"); } else { printf("************************************************\n"); printf(" - TESTING ZHER2K (%5s, %s) ... FAILED !\n", uplostr[u], transstr[t]); printf("************************************************\n"); } } } free(A); free(B); free(C); free(Cinit); free(Cfinal); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PLASMA_Complex64_t *A, *Acpy = NULL, *b = NULL, *x; real_Double_t t; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; /* Allocate Data */ A = (PLASMA_Complex64_t *)malloc(lda*n* sizeof(PLASMA_Complex64_t)); x = (PLASMA_Complex64_t *)malloc(ldb*nrhs*sizeof(PLASMA_Complex64_t)); /* Check if unable to allocate memory */ if ( (!A) || (!x) ) { printf("Out of Memory \n "); exit(0); } /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); /* } */ /* Initialiaze Data */ PLASMA_zplghe((double)n, n, A, lda, 51 ); LAPACKE_zlarnv_work(1, ISEED, n*nrhs, x); /* Save A and b */ if (check) { Acpy = (PLASMA_Complex64_t *)malloc(lda*n* sizeof(PLASMA_Complex64_t)); b = (PLASMA_Complex64_t *)malloc(ldb*nrhs*sizeof(PLASMA_Complex64_t)); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,' ', n, n, A, lda, Acpy, lda); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,' ', n, nrhs, x, ldb, b, ldb); } /* PLASMA ZPOSV */ t = -cWtime(); PLASMA_zposv(PlasmaUpper, n, nrhs, A, lda, x, ldb); t += cWtime(); *t_ = t; /* Check the solution */ if (check) { dparam[TIMING_RES] = z_check_solution(n, n, nrhs, Acpy, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); free(Acpy); free(b); } free(A); free(x); PLASMA_Finalize(); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { plasma_context_t *plasma; Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer; PLASMA_Complex64_t *A, *A2 = NULL; real_Double_t t; int *ipiv, *ipiv2 = NULL; int i; int m = iparam[TIMING_N]; int n = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = m; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* Allocate Data */ A = (PLASMA_Complex64_t *)malloc(lda*n*sizeof(PLASMA_Complex64_t)); /* Check if unable to allocate memory */ if ( (! A) ) { printf("Out of Memory \n "); return -1; } /* Initialiaze Data */ LAPACKE_zlarnv_work(1, ISEED, lda*n, A); /* Allocate Workspace */ ipiv = (int *)malloc( n*sizeof(int) ); /* Save A in lapack layout for check */ if ( check ) { A2 = (PLASMA_Complex64_t *)malloc(lda*n*sizeof(PLASMA_Complex64_t)); ipiv2 = (int *)malloc( n*sizeof(int) ); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,' ', m, n, A, lda, A2, lda); LAPACKE_zgetrf_work(LAPACK_COL_MAJOR, m, n, A2, lda, ipiv2 ); } plasma = plasma_context_self(); PLASMA_Sequence_Create(&sequence); QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence); QUARK_Task_Flag_Set(&task_flags, TASK_THREAD_COUNT, iparam[TIMING_THRDNBR] ); plasma_dynamic_spawn(); CORE_zgetrf_reclap_init(); t = -cWtime(); QUARK_CORE_zgetrf_reclap(plasma->quark, &task_flags, m, n, n, A, lda, ipiv, sequence, &request, 0, 0, iparam[TIMING_THRDNBR]); PLASMA_Sequence_Wait(sequence); t += cWtime(); *t_ = t; PLASMA_Sequence_Destroy(sequence); /* Check the solution */ if ( check ) { double *work = (double *)malloc(max(m,n)*sizeof(double)); /* Check ipiv */ for(i=0; i<n; i++) { if( ipiv[i] != ipiv2[i] ) { fprintf(stderr, "\nPLASMA (ipiv[%d] = %d, A[%d] = %e) / LAPACK (ipiv[%d] = %d, A[%d] = [%e])\n", i, ipiv[i], i, creal(A[ i * lda + i ]), i, ipiv2[i], i, creal(A2[ i * lda + i ])); break; } } dparam[TIMING_ANORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A, lda, work); dparam[TIMING_XNORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A2, lda, work); dparam[TIMING_BNORM] = 0.0; CORE_zaxpy( m, n, -1.0, A, lda, A2, lda); dparam[TIMING_RES] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A2, lda, work); free( A2 ); free( ipiv2 ); free( work ); } free( A ); free( ipiv ); PLASMA_Finalize(); return 0; }
int testing_zsymm(int argc, char **argv) { /* Check for number of arguments*/ if ( argc != 7 ){ USAGE("SYMM", "alpha beta M N K LDA LDB LDC", " - alpha : alpha coefficient \n" " - beta : beta coefficient \n" " - M : number of rows of matrices A and C \n" " - N : number of columns of matrices B and C \n" " - LDA : leading dimension of matrix A \n" " - LDB : leading dimension of matrix B \n" " - LDC : leading dimension of matrix C\n"); return -1; } PLASMA_Complex64_t alpha = (PLASMA_Complex64_t) atol(argv[0]); PLASMA_Complex64_t beta = (PLASMA_Complex64_t) atol(argv[1]); int M = atoi(argv[2]); int N = atoi(argv[3]); int LDA = atoi(argv[4]); int LDB = atoi(argv[5]); int LDC = atoi(argv[6]); int MNmax = max(M, N); double eps; int info_solution; int i, j, s, u; int LDAxM = LDA*MNmax; int LDBxN = LDB*N; int LDCxN = LDC*N; PLASMA_Complex64_t *A = (PLASMA_Complex64_t *)malloc(LDAxM*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *B = (PLASMA_Complex64_t *)malloc(LDBxN*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *C = (PLASMA_Complex64_t *)malloc(LDCxN*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *Cinit = (PLASMA_Complex64_t *)malloc(LDCxN*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *Cfinal = (PLASMA_Complex64_t *)malloc(LDCxN*sizeof(PLASMA_Complex64_t)); /* Check if unable to allocate memory */ if ((!A)||(!B)||(!Cinit)||(!Cfinal)){ printf("Out of Memory \n "); return -2; } eps = LAPACKE_dlamch_work('e'); printf("\n"); printf("------ TESTS FOR PLASMA ZSYMM ROUTINE ------- \n"); printf(" Size of the Matrix %d by %d\n", M, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 10.\n"); /*---------------------------------------------------------- * TESTING ZSYMM */ /* Initialize A */ PLASMA_zplgsy( (double)0., MNmax, A, LDA, 51 ); /* Initialize B */ LAPACKE_zlarnv_work(IONE, ISEED, LDBxN, B); /* Initialize C */ LAPACKE_zlarnv_work(IONE, ISEED, LDCxN, C); for (s=0; s<2; s++) { for (u=0; u<2; u++) { /* Initialize Cinit / Cfinal */ for ( i = 0; i < M; i++) for ( j = 0; j < N; j++) Cinit[LDC*j+i] = C[LDC*j+i]; for ( i = 0; i < M; i++) for ( j = 0; j < N; j++) Cfinal[LDC*j+i] = C[LDC*j+i]; /* PLASMA ZSYMM */ PLASMA_zsymm(side[s], uplo[u], M, N, alpha, A, LDA, B, LDB, beta, Cfinal, LDC); /* Check the solution */ info_solution = check_solution(side[s], uplo[u], M, N, alpha, A, LDA, B, LDB, beta, Cinit, Cfinal, LDC); if (info_solution == 0) { printf("***************************************************\n"); printf(" ---- TESTING ZSYMM (%5s, %5s) ....... PASSED !\n", sidestr[s], uplostr[u]); printf("***************************************************\n"); } else { printf("************************************************\n"); printf(" - TESTING ZSYMM (%s, %s) ... FAILED !\n", sidestr[s], uplostr[u]); printf("************************************************\n"); } } } free(A); free(B); free(C); free(Cinit); free(Cfinal); return 0; }
lapack_int LAPACKE_zlarnv( lapack_int idist, lapack_int* iseed, lapack_int n, lapack_complex_double* x ) { return LAPACKE_zlarnv_work( idist, iseed, n, x ); }
int main () { int cores = 2; int M = 10; int N = 15; int LDA = 10; int NRHS = 5; int LDB = 15; int info; int info_solution; int i,j; int LDAxN = LDA*N; int LDBxNRHS = LDB*NRHS; PLASMA_Complex64_t *A1 = (PLASMA_Complex64_t *)malloc(LDA*N*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *A2 = (PLASMA_Complex64_t *)malloc(LDA*N*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *B1 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *B2 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *T; /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!B1)||(!B2)){ printf("Out of Memory \n "); exit(0); } /* Plasma Initialization */ PLASMA_Init(cores); printf("-- PLASMA is initialized to run on %d cores. \n",cores); /* Allocate T */ PLASMA_Alloc_Workspace_zgeqrf(M, N, &T); /* Initialize A1 and A2 */ LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1); for (i = 0; i < M; i++) for (j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i] ; /* Initialize B1 and B2 */ LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1); for (i = 0; i < M; i++) for (j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i] ; /* Factorization QR of the matrix A2 */ info = PLASMA_zgelqf(M, N, A2, LDA, T); /* Solve the problem */ info = PLASMA_zgelqs(M, N, NRHS, A2, LDA, T, B2, LDB); /* Check the solution */ info_solution = check_solution(M, N, NRHS, A1, LDA, B1, B2, LDB); if ((info_solution != 0)|(info != 0)) printf("-- Error in ZGELQS example ! \n"); else printf("-- Run of ZGELQS example successful ! \n"); free(A1); free(A2); free(B1); free(B2); free(T); PLASMA_Finalize(); exit(0); }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PLASMA_Complex64_t *AT; PLASMA_desc *descA, *descT; real_Double_t t; int nb; int M = iparam[TIMING_N]; int N = iparam[TIMING_M]; //int N = M/nrhs; //RUN WITH NRHS = 10 or 20 (ALSO USED IN TIMING.C) int lda = M; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ nb = iparam[TIMING_NB]; /* Householder mode */ //PLASMA_Set(PLASMA_HOUSEHOLDER_MODE, PLASMA_FLAT_HOUSEHOLDER); PLASMA_Set(PLASMA_HOUSEHOLDER_MODE, PLASMA_TREE_HOUSEHOLDER); PLASMA_Set(PLASMA_HOUSEHOLDER_SIZE, 4); /* Allocate Data */ AT = (PLASMA_Complex64_t *)malloc(lda*N*sizeof(PLASMA_Complex64_t)); /* Check if unable to allocate memory */ if ( !AT ){ printf("Out of Memory \n "); exit(0); } /* Initialiaze Data */ PLASMA_Desc_Create(&descA, AT, PlasmaComplexDouble, nb, nb, nb*nb, M, N, 0, 0, M, N); LAPACKE_zlarnv_work(1, ISEED, lda*N, AT); /* Allocate Workspace */ PLASMA_Alloc_Workspace_zgels_Tile(M, N, &descT); t = -cWtime(); PLASMA_zgeqrf_Tile( descA, descT ); t += cWtime(); *t_ = t; /* Allocate Workspace */ PLASMA_Dealloc_Handle_Tile(&descT); PLASMA_Desc_Destroy(&descA); free( AT ); PLASMA_Finalize(); return 0; }
int main () { int cores = 2; int N = 10; int LDA = 10; int NRHS = 5; int LDB = 10; int info; int info_solution; int i,j; int LDAxN = LDA*N; int LDBxNRHS = LDB*NRHS; PLASMA_Complex64_t *A1 = (PLASMA_Complex64_t *)malloc(LDA*N*(sizeof*A1)); PLASMA_Complex64_t *A2 = (PLASMA_Complex64_t *)malloc(LDA*N*(sizeof*A2)); PLASMA_Complex64_t *B1 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*(sizeof*B1)); PLASMA_Complex64_t *B2 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*(sizeof*B2)); PLASMA_desc *L; int *IPIV; /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!B1)||(!B2)){ printf("Out of Memory \n "); return EXIT_SUCCESS; } /*Plasma Initialize*/ PLASMA_Init(cores); printf("-- PLASMA is initialized to run on %d cores. \n",cores); /* Initialize A1 and A2 Matrix */ LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1); for ( i = 0; i < N; i++) for ( j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i]; /* Initialize B1 and B2 */ LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1); for ( i = 0; i < N; i++) for ( j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i]; /* Allocate L and IPIV */ info = PLASMA_Alloc_Workspace_zgetrf_incpiv(N, N, &L, &IPIV); /* LU factorization of the matrix A */ info = PLASMA_zgetrf_incpiv(N, N, A2, LDA, L, IPIV); /* Solve the problem */ info = PLASMA_ztrsmpl(N, NRHS, A2, LDA, L, IPIV, B2, LDB); info = PLASMA_ztrsm(PlasmaLeft, PlasmaUpper, PlasmaNoTrans, PlasmaNonUnit, N, NRHS, (PLASMA_Complex64_t)1.0, A2, LDA, B2, LDB); /* Check the solution */ info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB); if ((info_solution != 0)|(info != 0)) printf("-- Error in ZGETRS example ! \n"); else printf("-- Run of ZGETRS example successful ! \n"); free(A1); free(A2); free(B1); free(B2); free(IPIV); free(L); PLASMA_Finalize(); return EXIT_SUCCESS; }
int main () { int cores = 2; int M = 15; int N = 10; int LDA = 15; int K = min(M, N); int info; int info_ortho, info_factorization; int i,j; int LDAxN = LDA*N; PLASMA_Complex64_t *A1 = (PLASMA_Complex64_t *)malloc(LDA*N*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *A2 = (PLASMA_Complex64_t *)malloc(LDA*N*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *Q = (PLASMA_Complex64_t *)malloc(LDA*N*sizeof(PLASMA_Complex64_t)); PLASMA_desc *T; /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!Q)){ printf("Out of Memory \n "); return EXIT_SUCCESS; } /* Plasma Initialization */ PLASMA_Init(cores); printf("-- PLASMA is initialized to run on %d cores. \n",cores); /* Allocate T */ PLASMA_Alloc_Workspace_zgeqrf(M, N, &T); /* Initialize A1 and A2 */ LAPACKE_zlarnv_work(IONE, ISEED, LDAxN, A1); for (i = 0; i < M; i++) for (j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i] ; /* Factorization QR of the matrix A2 */ info = PLASMA_zgeqrf(M, N, A2, LDA, T); /* Building the economy-size Q */ memset((void*)Q, 0, LDA*N*sizeof(PLASMA_Complex64_t)); for (i = 0; i < K; i++) Q[LDA*i+i] = 1.0; PLASMA_zungqr(M, N, K, A2, LDA, T, Q, LDA); /* Check the orthogonality, factorization and the solution */ info_ortho = check_orthogonality(M, N, LDA, Q); info_factorization = check_factorization(M, N, A1, A2, LDA, Q); printf("--- info %d %d %d \n",info_factorization,info_ortho,info); if ((info_ortho != 0)|(info_factorization != 0)|(info != 0)) printf("-- Error in ZGEQRF example ! \n"); else printf("-- Run of ZGEQRF example successful ! \n"); free(A1); free(A2); free(Q); free(T); PLASMA_Finalize(); return EXIT_SUCCESS; }
int testing_zcposv(int argc, char **argv) { /* Check for number of arguments*/ if (argc != 4){ USAGE("CPOSV", "N LDA NRHS LDB", " - N : the size of the matrix\n" " - LDA : leading dimension of the matrix A\n" " - NRHS : number of RHS\n" " - LDB : leading dimension of the RHS B\n"); return -1; } int N = atoi(argv[0]); int LDA = atoi(argv[1]); int NRHS = atoi(argv[2]); int LDB = atoi(argv[3]); int ITER; double eps; int uplo; int info; int info_solution = 0; /*, info_factorization;*/ int i,j; int NminusOne = N-1; int LDBxNRHS = LDB*NRHS; PLASMA_Complex64_t *A1 = (PLASMA_Complex64_t *)malloc(LDA*N *sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *A2 = (PLASMA_Complex64_t *)malloc(LDA*N *sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *B1 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *B2 = (PLASMA_Complex64_t *)malloc(LDB*NRHS*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *WORK = (PLASMA_Complex64_t *)malloc(2*LDA *sizeof(PLASMA_Complex64_t)); double *D = (double *)malloc(LDA*sizeof(double)); /* Check if unable to allocate memory */ if ( (!A1) || (!A2) || (!B1) || (!B2) ){ printf("Out of Memory \n "); exit(0); } eps = LAPACKE_dlamch_work('e'); /*------------------------------------------------------------- * TESTING ZCPOSV */ /* Initialize A1 and A2 for Symmetric Positif Matrix (Hessenberg in the complex case) */ LAPACKE_dlarnv_work(IONE, ISEED, LDA, D); zlaghe(&N, &NminusOne, D, A1, &LDA, ISEED, WORK, &info); free(D); for ( i = 0; i < N; i++) for ( j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i]; for ( i = 0; i < N; i++){ A1[LDA*i+i] = A1[LDA*i+i] + N ; A2[LDA*i+i] = A1[LDA*i+i]; } /* Initialize B1 and B2 */ LAPACKE_zlarnv_work(IONE, ISEED, LDBxNRHS, B1); for ( i = 0; i < N; i++) for ( j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i]; printf("\n"); printf("------ TESTS FOR PLASMA ZCPOSV ROUTINE ------ \n"); printf(" Size of the Matrix %d by %d\n", N, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n", eps); printf(" Computational tests pass if scaled residuals are less than 60.\n"); /* PLASMA ZCPOSV */ uplo = PlasmaLower; info = PLASMA_zcposv(uplo, N, NRHS, A2, LDA, B1, LDB, B2, LDB, &ITER); if (info != PLASMA_SUCCESS ) { printf("PLASMA_zcposv is not completed: info = %d\n", info); info_solution = 1; } else { printf(" Solution obtained with %d iterations\n", ITER); /* Check the factorization and the solution */ info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB, eps); } if (info_solution == 0){ printf("***************************************************\n"); printf(" ---- TESTING ZCPOSV ..................... PASSED !\n"); printf("***************************************************\n"); } else{ printf("***************************************************\n"); printf(" - TESTING ZCPOSV .. FAILED !\n"); printf("***************************************************\n"); } free(A1); free(A2); free(B1); free(B2); free(WORK); return 0; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { PLASMA_Complex64_t *AT, *bT, *x; PLASMA_Complex64_t *A = NULL; PLASMA_Complex64_t *b = NULL; PLASMA_desc *descA, *descB; real_Double_t t; int *piv; int n = iparam[TIMING_N]; int nb = iparam[TIMING_NB]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ /* Allocate Data */ AT = (PLASMA_Complex64_t *)malloc(lda*n *sizeof(PLASMA_Complex64_t)); bT = (PLASMA_Complex64_t *)malloc(ldb*nrhs*sizeof(PLASMA_Complex64_t)); piv = (int *)malloc( n*sizeof(int)); /* Check if unable to allocate memory */ if ( (!AT) || (!bT) || (!piv) ) { printf("Out of Memory \n "); return -1; } /* Initialize AT and bT for Symmetric Positif Matrix */ PLASMA_Desc_Create(&descA, AT, PlasmaComplexDouble, nb, nb, nb*nb, lda, n, 0, 0, n, n); PLASMA_Desc_Create(&descB, bT, PlasmaComplexDouble, nb, nb, nb*nb, ldb, nrhs, 0, 0, n, nrhs); LAPACKE_zlarnv_work(1, ISEED, lda*n, AT); LAPACKE_zlarnv_work(1, ISEED, ldb*nrhs, bT); /* Save AT and bT in lapack layout for check */ if ( check ) { A = (PLASMA_Complex64_t *)malloc(lda*n *sizeof(PLASMA_Complex64_t)); b = (PLASMA_Complex64_t *)malloc(ldb*nrhs*sizeof(PLASMA_Complex64_t)); PLASMA_zTile_to_Lapack(descA, (void*)A, lda); PLASMA_zTile_to_Lapack(descB, (void*)b, ldb); } t = -cWtime(); PLASMA_zgesv_Tile( descA, piv, descB ); t += cWtime(); *t_ = t; /* Check the solution */ if ( check ) { x = (PLASMA_Complex64_t *)malloc(ldb*nrhs *sizeof(PLASMA_Complex64_t)); PLASMA_zTile_to_Lapack(descB, (void*)x, n); dparam[TIMING_RES] = z_check_solution(n, n, nrhs, A, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); free(A); free(b); free(x); } PLASMA_Desc_Destroy(&descA); PLASMA_Desc_Destroy(&descB); free( AT ); free( bT ); free( piv ); PLASMA_Finalize(); return 0; }
int testing_zgemm(int argc, char **argv) { /* Check for number of arguments*/ if ( argc != 8) { USAGE("GEMM", "alpha beta M N K LDA LDB LDC", " - alpha : alpha coefficient\n" " - beta : beta coefficient\n" " - M : number of rows of matrices A and C\n" " - N : number of columns of matrices B and C\n" " - K : number of columns of matrix A / number of rows of matrix B\n" " - LDA : leading dimension of matrix A\n" " - LDB : leading dimension of matrix B\n" " - LDC : leading dimension of matrix C\n"); return -1; } PLASMA_Complex64_t alpha = (PLASMA_Complex64_t) atol(argv[0]); PLASMA_Complex64_t beta = (PLASMA_Complex64_t) atol(argv[1]); int M = atoi(argv[2]); int N = atoi(argv[3]); int K = atoi(argv[4]); int LDA = atoi(argv[5]); int LDB = atoi(argv[6]); int LDC = atoi(argv[7]); double eps; int info_solution; int i, j, ta, tb; int LDAxK = LDA*max(M,K); int LDBxN = LDB*max(K,N); int LDCxN = LDC*N; PLASMA_Complex64_t *A = (PLASMA_Complex64_t *)malloc(LDAxK*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *B = (PLASMA_Complex64_t *)malloc(LDBxN*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *C = (PLASMA_Complex64_t *)malloc(LDCxN*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *Cinit = (PLASMA_Complex64_t *)malloc(LDCxN*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *Cfinal = (PLASMA_Complex64_t *)malloc(LDCxN*sizeof(PLASMA_Complex64_t)); /* Check if unable to allocate memory */ if ((!A)||(!B)||(!Cinit)||(!Cfinal)){ printf("Out of Memory \n "); return -2; } eps = LAPACKE_dlamch_work('e'); printf("\n"); printf("------ TESTS FOR PLASMA ZGEMM ROUTINE ------- \n"); printf(" Size of the Matrix %d by %d\n", M, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 10.\n"); /*---------------------------------------------------------- * TESTING ZGEMM */ /* Initialize A, B, C */ LAPACKE_zlarnv_work(IONE, ISEED, LDAxK, A); LAPACKE_zlarnv_work(IONE, ISEED, LDBxN, B); LAPACKE_zlarnv_work(IONE, ISEED, LDCxN, C); #ifdef COMPLEX for (ta=0; ta<3; ta++) { for (tb=0; tb<3; tb++) { #else for (ta=0; ta<2; ta++) { for (tb=0; tb<2; tb++) { #endif for ( i = 0; i < M; i++) for ( j = 0; j < N; j++) Cinit[LDC*j+i] = C[LDC*j+i]; for ( i = 0; i < M; i++) for ( j = 0; j < N; j++) Cfinal[LDC*j+i] = C[LDC*j+i]; /* PLASMA ZGEMM */ PLASMA_zgemm(trans[ta], trans[tb], M, N, K, alpha, A, LDA, B, LDB, beta, Cfinal, LDC); /* Check the solution */ info_solution = check_solution(trans[ta], trans[tb], M, N, K, alpha, A, LDA, B, LDB, beta, Cinit, Cfinal, LDC); if (info_solution == 0) { printf("***************************************************\n"); printf(" ---- TESTING ZGEMM (%s, %s) ............... PASSED !\n", transstr[ta], transstr[tb]); printf("***************************************************\n"); } else { printf("************************************************\n"); printf(" - TESTING ZGEMM (%s, %s) ... FAILED !\n", transstr[ta], transstr[tb]); printf("************************************************\n"); } } } #ifdef _UNUSED_ }} #endif free(A); free(B); free(C); free(Cinit); free(Cfinal); return 0; }