void CORE_cgetrf_reclap_init(void) { int i; for (i = 0; i < AMAX1BUF_SIZE; ++i) CORE_camax1buf[i] = -1.0; sfmin = LAPACKE_slamch_work('S'); }
int check_factorization(int N, PLASMA_Complex32_t *A1, PLASMA_Complex32_t *A2, int LDA, int uplo) { float Anorm, Rnorm; PLASMA_Complex32_t alpha; int info_factorization; int i,j; float eps; eps = LAPACKE_slamch_work('e'); PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *L1 = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *L2 = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t)); float *work = (float *)malloc(N*sizeof(float)); memset((void*)L1, 0, N*N*sizeof(PLASMA_Complex32_t)); memset((void*)L2, 0, N*N*sizeof(PLASMA_Complex32_t)); alpha= 1.0; LAPACKE_clacpy_work(LAPACK_COL_MAJOR,' ', N, N, A1, LDA, Residual, N); /* Dealing with L'L or U'U */ if (uplo == PlasmaUpper){ LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L1, N); LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L2, N); cblas_ctrmm(CblasColMajor, CblasLeft, CblasUpper, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), L1, N, L2, N); } else{ LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L1, N); LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L2, N); cblas_ctrmm(CblasColMajor, CblasRight, CblasLower, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), L1, N, L2, N); } /* Compute the Residual || A -L'L|| */ for (i = 0; i < N; i++) for (j = 0; j < N; j++) Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i]; Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N, work); Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work); printf("============\n"); printf("Checking the Cholesky Factorization \n"); printf("-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps)); if ( isnan(Rnorm/(Anorm*N*eps)) || (Rnorm/(Anorm*N*eps) > 10.0) ){ printf("-- Factorization is suspicious ! \n"); info_factorization = 1; } else{ printf("-- Factorization is CORRECT ! \n"); info_factorization = 0; } free(Residual); free(L1); free(L2); free(work); return info_factorization; }
static int check_solution(PLASMA_enum uplo, PLASMA_enum trans, int N, int K, PLASMA_Complex32_t alpha, PLASMA_Complex32_t *A, int LDA, PLASMA_Complex32_t *B, int LDB, float beta, PLASMA_Complex32_t *Cref, PLASMA_Complex32_t *Cplasma, int LDC) { int info_solution; float Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm, result; float eps; PLASMA_Complex32_t beta_const; float *work = (float *)malloc(max(N, K)* sizeof(float)); beta_const = -1.0; Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), (trans == PlasmaNoTrans) ? N : K, (trans == PlasmaNoTrans) ? K : N, A, LDA, work); Bnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), (trans == PlasmaNoTrans) ? N : K, (trans == PlasmaNoTrans) ? K : N, B, LDB, work); Cinitnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); Cplasmanorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cplasma, LDC, work); cblas_cher2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, N, K, CBLAS_SADDR(alpha), A, LDA, B, LDB, (beta), Cref, LDC); Clapacknorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); cblas_caxpy(LDC*N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1); Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); eps = LAPACKE_slamch_work('e'); printf("Rnorm %e, Anorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n", Rnorm, Anorm, Cinitnorm, Cplasmanorm, Clapacknorm); result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps); printf("============\n"); printf("Checking the norm of the difference against reference CHER2K \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||C||_oo).N.eps) = %e \n", result); if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
int check_solution(int M, int N, int NRHS, PLASMA_Complex32_t *A1, int LDA, PLASMA_Complex32_t *B1, PLASMA_Complex32_t *B2, int LDB) { int info_solution; float Rnorm, Anorm, Xnorm, Bnorm; PLASMA_Complex32_t alpha, beta; float *work = (float *)malloc(max(M, N)* sizeof(float)); float eps; eps = LAPACKE_slamch_work('e'); alpha = 1.0; beta = -1.0; Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, A1, LDA, work); Xnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, NRHS, B2, LDB, work); Bnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work); cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, NRHS, N, CBLAS_SADDR(alpha), A1, LDA, B2, LDB, CBLAS_SADDR(beta), B1, LDB); if (M >= N) { PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(M*NRHS*sizeof(PLASMA_Complex32_t)); memset((void*)Residual, 0, M*NRHS*sizeof(PLASMA_Complex32_t)); cblas_cgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, N, NRHS, M, CBLAS_SADDR(alpha), A1, LDA, B1, LDB, CBLAS_SADDR(beta), Residual, M); Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, NRHS, Residual, M, work); free(Residual); } else { PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(N*NRHS*sizeof(PLASMA_Complex32_t)); memset((void*)Residual, 0, N*NRHS*sizeof(PLASMA_Complex32_t)); cblas_cgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, N, NRHS, M, CBLAS_SADDR(alpha), A1, LDA, B1, LDB, CBLAS_SADDR(beta), Residual, N); Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, Residual, N, work); free(Residual); } printf("============\n"); printf("Checking the Residual of the solution \n"); printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||)_oo.N.eps) = %e \n",Rnorm/((Anorm*Xnorm+Bnorm)*N*eps)); if (isnan(Rnorm / ((Anorm * Xnorm + Bnorm) * N * eps)) || (Rnorm / ((Anorm * Xnorm + Bnorm) * N * eps) > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
static int check_solution(PLASMA_enum side, PLASMA_enum uplo, int M, int N, PLASMA_Complex32_t alpha, PLASMA_Complex32_t *A, int LDA, PLASMA_Complex32_t *B, int LDB, PLASMA_Complex32_t beta, PLASMA_Complex32_t *Cref, PLASMA_Complex32_t *Cplasma, int LDC) { int info_solution, NrowA; float Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm; float eps; PLASMA_Complex32_t beta_const; float result; float *work = (float *)malloc(max(M, N)* sizeof(float)); beta_const = (PLASMA_Complex32_t)-1.0; NrowA = (side == PlasmaLeft) ? M : N; Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), NrowA, NrowA, A, LDA, work); Bnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, B, LDB, work); Cinitnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); Cplasmanorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cplasma, LDC, work); cblas_csymm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, M, N, CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC); Clapacknorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); cblas_caxpy(LDC * N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1); Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); eps = LAPACKE_slamch_work('e'); printf("Rnorm %e, Anorm %e, Bnorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n",Rnorm,Anorm,Bnorm,Cinitnorm,Cplasmanorm,Clapacknorm); result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps); printf("============\n"); printf("Checking the norm of the difference against reference CSYMM \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||B||_oo+||C||_oo).N.eps) = %e \n", result ); if ( isinf(Clapacknorm) || isinf(Cplasmanorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
int check_solution(int N, int NRHS, PLASMA_Complex32_t *A1, int LDA, PLASMA_Complex32_t *B1, PLASMA_Complex32_t *B2, int LDB ) { int info_solution; float Rnorm, Anorm, Xnorm, Bnorm; PLASMA_Complex32_t alpha, beta; float *work = (float *)malloc(N*sizeof(float)); float eps; eps = LAPACKE_slamch_work('e'); /* Initialize A1 and A2 for Symmetric Positive Matrix */ alpha = 1.0; beta = -1.0; Xnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B2, LDB, work); Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work); Bnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work); cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, NRHS, N, CBLAS_SADDR(alpha), A1, LDA, B2, LDB, CBLAS_SADDR(beta), B1, LDB); Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work); printf("============\n"); printf("Checking the Residual of the solution \n"); printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n",Rnorm/((Anorm*Xnorm+Bnorm)*N*eps)); if (Rnorm/((Anorm*Xnorm+Bnorm)*N*eps) > 10.0){ printf("-- The solution is suspicious ! \n"); info_solution = 1; } else{ printf("-- The solution is CORRECT ! \n"); info_solution = 0; } free(work); return info_solution; }
int check_solution(int N, int NRHS, float *A1, int LDA, float *B1, float *B2, int LDB) { int info_solution; float Rnorm, Anorm, Xnorm, Bnorm; float alpha, beta; float *work = (float *)malloc(N*sizeof(float)); float eps; eps = LAPACKE_slamch_work('e'); alpha = 1.0; beta = -1.0; Xnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B2, LDB, work); Anorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work); Bnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work); cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, NRHS, N, (alpha), A1, LDA, B2, LDB, (beta), B1, LDB); Rnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work); printf("============\n"); printf("Checking the Residual of the solution \n"); printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n",Rnorm/((Anorm*Xnorm+Bnorm)*N*eps)); if ( isnan(Rnorm/((Anorm*Xnorm+Bnorm)*N*eps)) || (Rnorm/((Anorm*Xnorm+Bnorm)*N*eps) > 10.0) ){ printf("-- The solution is suspicious ! \n"); info_solution = 1; } else{ printf("-- The solution is CORRECT ! \n"); info_solution = 0; } free(work); return info_solution; }
float LAPACKE_slamch( char cmach ) { return LAPACKE_slamch_work( cmach ); }
int testing_cher2k(int argc, char **argv) { /* Check for number of arguments*/ if ( argc != 7 ){ USAGE("HER2K", "alpha beta M N LDA LDB LDC", " - alpha : alpha coefficient\n" " - beta : beta coefficient\n" " - N : number of columns and rows of matrix C and number of row of matrix A and B\n" " - K : number of columns of matrix A and B\n" " - LDA : leading dimension of matrix A\n" " - LDB : leading dimension of matrix B\n" " - LDC : leading dimension of matrix C\n"); return -1; } PLASMA_Complex32_t alpha = (PLASMA_Complex32_t) atol(argv[0]); float beta = (float) atol(argv[1]); int N = atoi(argv[2]); int K = atoi(argv[3]); int LDA = atoi(argv[4]); int LDB = atoi(argv[5]); int LDC = atoi(argv[6]); int NKmax = max(N, K); int NminusOne = N - 1; float eps; int info_solution; int info, u, t; size_t LDAxK = LDA*NKmax; size_t LDBxK = LDB*NKmax; size_t LDCxN = LDC*N; PLASMA_Complex32_t *A = (PLASMA_Complex32_t *)malloc(LDAxK*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *B = (PLASMA_Complex32_t *)malloc(LDBxK*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *C = (PLASMA_Complex32_t *)malloc(LDCxN*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *Cinit = (PLASMA_Complex32_t *)malloc(LDCxN*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *Cfinal = (PLASMA_Complex32_t *)malloc(LDCxN*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *WORK = (PLASMA_Complex32_t *)malloc(2*LDC*sizeof(PLASMA_Complex32_t)); float *D = (float *) malloc(LDC *sizeof(float)); /* Check if unable to allocate memory */ if ( (!A) || (!B) || (!Cinit) || (!Cfinal) || (!D) ){ printf("Out of Memory \n "); return -2; } eps = LAPACKE_slamch_work('e'); printf("\n"); printf("------ TESTS FOR PLASMA CHER2K ROUTINE ------- \n"); printf(" Size of the Matrix C %d by %d\n", N, K); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 10.\n"); /*---------------------------------------------------------- * TESTING CHER2K */ /* Initialize A,B */ LAPACKE_clarnv_work(IONE, ISEED, LDAxK, A); LAPACKE_clarnv_work(IONE, ISEED, LDBxK, B); /* Initialize C */ LAPACKE_slarnv_work(IONE, ISEED, LDC, D); claghe(&N, &NminusOne, D, C, &LDC, ISEED, WORK, &info); free(D); free(WORK); for (u=0; u<2; u++) { for (t=0; t<3; t++) { if (trans[t] == PlasmaTrans) continue; memcpy(Cinit, C, LDCxN*sizeof(PLASMA_Complex32_t)); memcpy(Cfinal, C, LDCxN*sizeof(PLASMA_Complex32_t)); /* PLASMA CHER2K */ PLASMA_cher2k(uplo[u], trans[t], N, K, alpha, A, LDA, B, LDB, beta, Cfinal, LDC); /* Check the solution */ info_solution = check_solution(uplo[u], trans[t], N, K, alpha, A, LDA, B, LDB, beta, Cinit, Cfinal, LDC); if (info_solution == 0) { printf("***************************************************\n"); printf(" ---- TESTING CHER2K (%5s, %s) ........... PASSED !\n", uplostr[u], transstr[t]); printf("***************************************************\n"); } else { printf("************************************************\n"); printf(" - TESTING CHER2K (%5s, %s) ... FAILED !\n", uplostr[u], transstr[t]); printf("************************************************\n"); } } } free(A); free(B); free(C); free(Cinit); free(Cfinal); return 0; }
int testing_strsm(int argc, char **argv) { /* Check for number of arguments*/ if ( argc != 5 ) { USAGE("TRSM", "alpha M N LDA LDB", " - alpha : alpha coefficient\n" " - M : number of rows of matrices B\n" " - N : number of columns of matrices B\n" " - LDA : leading dimension of matrix A\n" " - LDB : leading dimension of matrix B\n"); return -1; } float alpha = (float) atol(argv[0]); int M = atoi(argv[1]); int N = atoi(argv[2]); int LDA = atoi(argv[3]); int LDB = atoi(argv[4]); float eps; int info_solution; int s, u, t, d, i; int LDAxM = LDA*max(M,N); int LDBxN = LDB*max(M,N); float *A = (float *)malloc(LDAxM*sizeof(float)); float *B = (float *)malloc(LDBxN*sizeof(float)); float *Binit = (float *)malloc(LDBxN*sizeof(float)); float *Bfinal = (float *)malloc(LDBxN*sizeof(float)); /* Check if unable to allocate memory */ if ( (!A) || (!B) || (!Binit) || (!Bfinal)){ printf("Out of Memory \n "); return -2; } eps = LAPACKE_slamch_work('e'); printf("\n"); printf("------ TESTS FOR PLASMA STRSM ROUTINE ------- \n"); printf(" Size of the Matrix B : %d by %d\n", M, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 10.\n"); /*---------------------------------------------------------- * TESTING STRSM */ /* Initialize A, B, C */ LAPACKE_slarnv_work(IONE, ISEED, LDAxM, A); LAPACKE_slarnv_work(IONE, ISEED, LDBxN, B); for(i=0;i<max(M,N);i++) A[LDA*i+i] = A[LDA*i+i] + 2.0; for (s=0; s<2; s++) { for (u=0; u<2; u++) { #ifdef COMPLEX for (t=0; t<3; t++) { #else for (t=0; t<2; t++) { #endif for (d=0; d<2; d++) { memcpy(Binit, B, LDBxN*sizeof(float)); memcpy(Bfinal, B, LDBxN*sizeof(float)); /* PLASMA STRSM */ PLASMA_strsm(side[s], uplo[u], trans[t], diag[d], M, N, alpha, A, LDA, Bfinal, LDB); /* Check the solution */ info_solution = check_solution(side[s], uplo[u], trans[t], diag[d], M, N, alpha, A, LDA, Binit, Bfinal, LDB); printf("***************************************************\n"); if (info_solution == 0) { printf(" ---- TESTING STRSM (%s, %s, %s, %s) ...... PASSED !\n", sidestr[s], uplostr[u], transstr[t], diagstr[d]); } else { printf(" ---- TESTING STRSM (%s, %s, %s, %s) ... FAILED !\n", sidestr[s], uplostr[u], transstr[t], diagstr[d]); } printf("***************************************************\n"); } } } } free(A); free(B); free(Binit); free(Bfinal); return 0; } /*-------------------------------------------------------------- * Check the solution */ static int check_solution(PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum trans, PLASMA_enum diag, int M, int N, float alpha, float *A, int LDA, float *Bref, float *Bplasma, int LDB) { int info_solution; float Anorm, Binitnorm, Bplasmanorm, Blapacknorm, Rnorm, result; float eps; float mzone = (float)-1.0; float *work = (float *)malloc(max(M, N)* sizeof(float)); int Am, An; if (side == PlasmaLeft) { Am = M; An = M; } else { Am = N; An = N; } Anorm = LAPACKE_slantr_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), lapack_const(uplo), lapack_const(diag), Am, An, A, LDA, work); Binitnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bref, LDB, work); Bplasmanorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bplasma, LDB, work); cblas_strsm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag, M, N, (alpha), A, LDA, Bref, LDB); Blapacknorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bref, LDB, work); cblas_saxpy(LDB * N, (mzone), Bplasma, 1, Bref, 1); Rnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bref, LDB, work); eps = LAPACKE_slamch_work('e'); printf("Rnorm %e, Anorm %e, Binitnorm %e, Bplasmanorm %e, Blapacknorm %e\n", Rnorm, Anorm, Binitnorm, Bplasmanorm, Blapacknorm); result = Rnorm / ((Anorm + Blapacknorm) * max(M,N) * eps); printf("============\n"); printf("Checking the norm of the difference against reference STRSM \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||B||_oo).N.eps) = %e \n", result); if ( isinf(Blapacknorm) || isinf(Bplasmanorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
int testing_csymm(int argc, char **argv) { /* Check for number of arguments*/ if ( argc != 7 ){ USAGE("SYMM", "alpha beta M N K LDA LDB LDC", " - alpha : alpha coefficient \n" " - beta : beta coefficient \n" " - M : number of rows of matrices A and C \n" " - N : number of columns of matrices B and C \n" " - LDA : leading dimension of matrix A \n" " - LDB : leading dimension of matrix B \n" " - LDC : leading dimension of matrix C\n"); return -1; } PLASMA_Complex32_t alpha = (PLASMA_Complex32_t) atol(argv[0]); PLASMA_Complex32_t beta = (PLASMA_Complex32_t) atol(argv[1]); int M = atoi(argv[2]); int N = atoi(argv[3]); int LDA = atoi(argv[4]); int LDB = atoi(argv[5]); int LDC = atoi(argv[6]); int MNmax = max(M, N); int MminusOne = MNmax - 1; float eps; int info_solution; int i, j, s, u, info; int LDAxM = LDA*max(M, N); int LDBxN = LDB*N; int LDCxN = LDC*N; PLASMA_Complex32_t *A = (PLASMA_Complex32_t *)malloc(LDAxM*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *B = (PLASMA_Complex32_t *)malloc(LDBxN*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *C = (PLASMA_Complex32_t *)malloc(LDCxN*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *Cinit = (PLASMA_Complex32_t *)malloc(LDCxN*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *Cfinal = (PLASMA_Complex32_t *)malloc(LDCxN*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *WORK = (PLASMA_Complex32_t *)malloc(2*LDC*sizeof(PLASMA_Complex32_t)); float *D = (float *) malloc(LDC *sizeof(float)); /* Check if unable to allocate memory */ if ((!A)||(!B)||(!Cinit)||(!Cfinal)){ printf("Out of Memory \n "); return -2; } eps = LAPACKE_slamch_work('e'); printf("\n"); printf("------ TESTS FOR PLASMA CSYMM ROUTINE ------- \n"); printf(" Size of the Matrix %d by %d\n", M, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 10.\n"); /*---------------------------------------------------------- * TESTING CSYMM */ /* Initialize A */ LAPACKE_slarnv_work(IONE, ISEED, LDC, D); claghe(&MNmax, &MminusOne, D, A, &LDA, ISEED, WORK, &info); free(D); free(WORK); /* Initialize B */ LAPACKE_clarnv_work(IONE, ISEED, LDBxN, B); /* Initialize C */ LAPACKE_clarnv_work(IONE, ISEED, LDCxN, C); for (s=0; s<2; s++) { for (u=0; u<2; u++) { /* Initialize Cinit / Cfinal */ for ( i = 0; i < M; i++) for ( j = 0; j < N; j++) Cinit[LDC*j+i] = C[LDC*j+i]; for ( i = 0; i < M; i++) for ( j = 0; j < N; j++) Cfinal[LDC*j+i] = C[LDC*j+i]; /* PLASMA CSYMM */ PLASMA_csymm(side[s], uplo[u], M, N, alpha, A, LDA, B, LDB, beta, Cfinal, LDC); /* Check the solution */ info_solution = check_solution(side[s], uplo[u], M, N, alpha, A, LDA, B, LDB, beta, Cinit, Cfinal, LDC); if (info_solution == 0) { printf("***************************************************\n"); printf(" ---- TESTING CSYMM (%5s, %5s) ....... PASSED !\n", sidestr[s], uplostr[u]); printf("***************************************************\n"); } else { printf("************************************************\n"); printf(" - TESTING CSYMM (%s, %s) ... FAILED !\n", sidestr[s], uplostr[u]); printf("************************************************\n"); } } } free(A); free(B); free(C); free(Cinit); free(Cfinal); return 0; }
int testing_ssygst(int argc, char **argv) { /* Check for number of arguments*/ if (argc != 3) { USAGE("HEGST", "N LDA LDB", " - N : size of the matrices A and B\n" " - LDA : leading dimension of the matrix A\n" " - LDB : leading dimension of the matrix B\n"); return -1; } float eps = LAPACKE_slamch_work('e'); int N = atoi(argv[0]); int LDA = atoi(argv[1]); int LDB = atoi(argv[2]); int info_transformation, info_factorization; int i, u; int LDAxN = LDA*N; int LDBxN = LDB*N; float *A1 = (float *)malloc(LDAxN*sizeof(float)); float *A2 = (float *)malloc(LDAxN*sizeof(float)); float *B1 = (float *)malloc(LDBxN*sizeof(float)); float *B2 = (float *)malloc(LDBxN*sizeof(float)); float *Ainit = (float *)malloc(LDAxN*sizeof(float)); float *Binit = (float *)malloc(LDBxN*sizeof(float)); /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!B1)||(!B2)||(!Ainit)||(!Binit)){ printf("Out of Memory \n "); return -2; } /*---------------------------------------------------------- * TESTING SSYGST */ /* Initialize A1 and A2 */ PLASMA_splgsy(0., N, A1, LDA, 5198); LAPACKE_slacpy_work(LAPACK_COL_MAJOR, 'A', N, N, A1, LDA, Ainit, LDA); /* Initialize B1 and B2 */ PLASMA_splgsy((float)N, N, B1, LDB, 4231); LAPACKE_slacpy_work(LAPACK_COL_MAJOR, 'A', N, N, B1, LDB, Binit, LDB); printf("\n"); printf("------ TESTS FOR PLASMA SSYGST ROUTINE ------- \n"); printf(" Size of the Matrix %d by %d\n", N, N); printf("\n"); printf(" The matrices A and B are randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 60.\n"); /*---------------------------------------------------------- * TESTING SSYGST */ for (i=0; i<3; i++) { for (u=0; u<2; u++) { memcpy(A2, Ainit, LDAxN*sizeof(float)); memcpy(B2, Binit, LDBxN*sizeof(float)); PLASMA_spotrf(uplo[u], N, B2, LDB); PLASMA_ssygst(itype[i], uplo[u], N, A2, LDA, B2, LDB); /* Check the Cholesky factorization and the transformation */ info_factorization = check_factorization(N, B1, B2, LDB, uplo[u], eps); info_transformation = check_transformation(itype[i], uplo[u], N, A1, A2, LDA, B2, LDB, eps); if ( (info_transformation == 0) && (info_factorization == 0) ) { printf("***************************************************\n"); printf(" ---- TESTING SSYGST (%s, %s) ....... PASSED !\n", itypestr[i], uplostr[u]); printf("***************************************************\n"); } else { printf("************************************************\n"); printf(" - TESTING SSYGST (%s, %s) ... FAILED !\n", itypestr[i], uplostr[u]); printf("************************************************\n"); } } } free(A1); free(A2); free(B1); free(B2); free(Ainit); free(Binit); return 0; }
int testing_sgemm(int argc, char **argv) { /* Check for number of arguments*/ if ( argc != 8) { USAGE("GEMM", "alpha beta M N K LDA LDB LDC", " - alpha : alpha coefficient\n" " - beta : beta coefficient\n" " - M : number of rows of matrices A and C\n" " - N : number of columns of matrices B and C\n" " - K : number of columns of matrix A / number of rows of matrix B\n" " - LDA : leading dimension of matrix A\n" " - LDB : leading dimension of matrix B\n" " - LDC : leading dimension of matrix C\n"); return -1; } float alpha = (float) atol(argv[0]); float beta = (float) atol(argv[1]); int M = atoi(argv[2]); int N = atoi(argv[3]); int K = atoi(argv[4]); int LDA = atoi(argv[5]); int LDB = atoi(argv[6]); int LDC = atoi(argv[7]); float eps; int info_solution; int i, j, ta, tb; int LDAxK = LDA*max(M,K); int LDBxN = LDB*max(K,N); int LDCxN = LDC*N; float *A = (float *)malloc(LDAxK*sizeof(float)); float *B = (float *)malloc(LDBxN*sizeof(float)); float *C = (float *)malloc(LDCxN*sizeof(float)); float *Cinit = (float *)malloc(LDCxN*sizeof(float)); float *Cfinal = (float *)malloc(LDCxN*sizeof(float)); /* Check if unable to allocate memory */ if ((!A)||(!B)||(!Cinit)||(!Cfinal)){ printf("Out of Memory \n "); return -2; } eps = LAPACKE_slamch_work('e'); printf("\n"); printf("------ TESTS FOR PLASMA SGEMM ROUTINE ------- \n"); printf(" Size of the Matrix %d by %d\n", M, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 10.\n"); /*---------------------------------------------------------- * TESTING SGEMM */ /* Initialize A, B, C */ LAPACKE_slarnv_work(IONE, ISEED, LDAxK, A); LAPACKE_slarnv_work(IONE, ISEED, LDBxN, B); LAPACKE_slarnv_work(IONE, ISEED, LDCxN, C); #ifdef COMPLEX for (ta=0; ta<3; ta++) { for (tb=0; tb<3; tb++) { #else for (ta=0; ta<2; ta++) { for (tb=0; tb<2; tb++) { #endif for ( i = 0; i < M; i++) for ( j = 0; j < N; j++) Cinit[LDC*j+i] = C[LDC*j+i]; for ( i = 0; i < M; i++) for ( j = 0; j < N; j++) Cfinal[LDC*j+i] = C[LDC*j+i]; /* PLASMA SGEMM */ PLASMA_sgemm(trans[ta], trans[tb], M, N, K, alpha, A, LDA, B, LDB, beta, Cfinal, LDC); /* Check the solution */ info_solution = check_solution(trans[ta], trans[tb], M, N, K, alpha, A, LDA, B, LDB, beta, Cinit, Cfinal, LDC); if (info_solution == 0) { printf("***************************************************\n"); printf(" ---- TESTING SGEMM (%s, %s) ............... PASSED !\n", transstr[ta], transstr[tb]); printf("***************************************************\n"); } else { printf("************************************************\n"); printf(" - TESTING SGEMM (%s, %s) ... FAILED !\n", transstr[ta], transstr[tb]); printf("************************************************\n"); } } } #ifdef _UNUSED_ }} #endif free(A); free(B); free(C); free(Cinit); free(Cfinal); return 0; }
static int check_solution(PLASMA_enum transA, PLASMA_enum transB, int M, int N, int K, float alpha, float *A, int LDA, float *B, int LDB, float beta, float *Cref, float *Cplasma, int LDC) { int info_solution; float Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm, result; float eps; float beta_const; float *work = (float *)malloc(max(K,max(M, N))* sizeof(float)); int Am, An, Bm, Bn; beta_const = -1.0; if (transA == PlasmaNoTrans) { Am = M; An = K; } else { Am = K; An = M; } if (transB == PlasmaNoTrans) { Bm = K; Bn = N; } else { Bm = N; Bn = K; } Anorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), Am, An, A, LDA, work); Bnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), Bm, Bn, B, LDB, work); Cinitnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); Cplasmanorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cplasma, LDC, work); cblas_sgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K, (alpha), A, LDA, B, LDB, (beta), Cref, LDC); Clapacknorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); cblas_saxpy(LDC * N, (beta_const), Cplasma, 1, Cref, 1); Rnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); eps = LAPACKE_slamch_work('e'); printf("Rnorm %e, Anorm %e, Bnorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n", Rnorm, Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm); result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps); printf("============\n"); printf("Checking the norm of the difference against reference SGEMM \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||B||_oo+||C||_oo).N.eps) = %e \n", result); if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }