/*------------------------------------------------------------------------ * Check the factorization of the matrix A2 */ static int check_factorization(int N, PLASMA_Complex64_t *A1, PLASMA_Complex64_t *A2, int LDA, int uplo, double eps) { PLASMA_Complex64_t alpha = 1.0; double Anorm, Rnorm, result; int info_factorization; int i,j; PLASMA_Complex64_t *Residual = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *L1 = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *L2 = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t)); double *work = (double *)malloc(N*sizeof(double)); memset((void*)L1, 0, N*N*sizeof(PLASMA_Complex64_t)); memset((void*)L2, 0, N*N*sizeof(PLASMA_Complex64_t)); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,' ', N, N, A1, LDA, Residual, N); /* Dealing with L'L or U'U */ LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, L1, N); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, L2, N); if (uplo == PlasmaUpper) cblas_ztrmm(CblasColMajor, CblasLeft, (CBLAS_UPLO)uplo, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), L1, N, L2, N); else cblas_ztrmm(CblasColMajor, CblasRight, (CBLAS_UPLO)uplo, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), L1, N, L2, N); /* Compute the Residual || A - L'L|| */ for (i = 0; i < N; i++) for (j = 0; j < N; j++) Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i]; Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N, work); Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work); result = Rnorm / ( Anorm * N * eps ); printf("============\n"); printf("Checking the Cholesky Factorization \n"); printf("-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e \n", result); if ( isnan(result) || isinf(result) || (result > 60.0) ){ printf("-- Factorization is suspicious ! \n"); info_factorization = 1; } else{ printf("-- Factorization is CORRECT ! \n"); info_factorization = 0; } free(Residual); free(L1); free(L2); free(work); return info_factorization; }
int check_solution(int M, int N, int NRHS, PLASMA_Complex64_t *A1, int LDA, PLASMA_Complex64_t *B1, PLASMA_Complex64_t *B2, int LDB) { int info_solution; double Rnorm, Anorm, Xnorm, Bnorm; PLASMA_Complex64_t alpha, beta; double *work = (double *)malloc(max(M, N)* sizeof(double)); double eps; eps = LAPACKE_dlamch_work('e'); alpha = 1.0; beta = -1.0; Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, A1, LDA, work); Xnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, NRHS, B2, LDB, work); Bnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work); cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, NRHS, N, CBLAS_SADDR(alpha), A1, LDA, B2, LDB, CBLAS_SADDR(beta), B1, LDB); if (M >= N) { PLASMA_Complex64_t *Residual = (PLASMA_Complex64_t *)malloc(M*NRHS*sizeof(PLASMA_Complex64_t)); memset((void*)Residual, 0, M*NRHS*sizeof(PLASMA_Complex64_t)); cblas_zgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, N, NRHS, M, CBLAS_SADDR(alpha), A1, LDA, B1, LDB, CBLAS_SADDR(beta), Residual, M); Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, NRHS, Residual, M, work); free(Residual); } else { PLASMA_Complex64_t *Residual = (PLASMA_Complex64_t *)malloc(N*NRHS*sizeof(PLASMA_Complex64_t)); memset((void*)Residual, 0, N*NRHS*sizeof(PLASMA_Complex64_t)); cblas_zgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, N, NRHS, M, CBLAS_SADDR(alpha), A1, LDA, B1, LDB, CBLAS_SADDR(beta), Residual, N); Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, Residual, N, work); free(Residual); } printf("============\n"); printf("Checking the Residual of the solution \n"); printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||)_oo.N.eps) = %e \n",Rnorm/((Anorm*Xnorm+Bnorm)*N*eps)); if (isnan(Rnorm / ((Anorm * Xnorm + Bnorm) * N * eps)) || (Rnorm / ((Anorm * Xnorm + Bnorm) * N * eps) > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
static int check_solution(int N, int NRHS, PLASMA_Complex64_t *A1, int LDA, PLASMA_Complex64_t *B1, PLASMA_Complex64_t *B2, int LDB, double eps ) { int info_solution; double Rnorm, Anorm, Xnorm, Bnorm, result; PLASMA_Complex64_t alpha, beta; double *work = (double *)malloc(N*sizeof(double)); alpha = 1.0; beta = -1.0; Xnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B2, LDB, work); Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work); Bnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work); cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, NRHS, N, CBLAS_SADDR(alpha), A1, LDA, B2, LDB, CBLAS_SADDR(beta), B1, LDB); Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work); if (getenv("PLASMA_TESTING_VERBOSE")) printf( "||A||_oo=%f\n||X||_oo=%f\n||B||_oo=%f\n||A X - B||_oo=%e\n", Anorm, Xnorm, Bnorm, Rnorm ); result = Rnorm / ( (Anorm*Xnorm+Bnorm)*N*eps ) ; printf("============\n"); printf("Checking the Residual of the solution \n"); printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n", result); if ( isnan(Xnorm) || isinf(Xnorm) || isnan(result) || isinf(result) || (result > 60.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else{ printf("-- The solution is CORRECT ! \n"); info_solution = 0; } free(work); return info_solution; }
static int check_solution(PLASMA_enum uplo, PLASMA_enum trans, int N, int K, PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB, double beta, PLASMA_Complex64_t *Cref, PLASMA_Complex64_t *Cplasma, int LDC) { int info_solution; double Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm, result; double eps; PLASMA_Complex64_t beta_const; double *work = (double *)malloc(max(N, K)* sizeof(double)); beta_const = -1.0; Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), (trans == PlasmaNoTrans) ? N : K, (trans == PlasmaNoTrans) ? K : N, A, LDA, work); Bnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), (trans == PlasmaNoTrans) ? N : K, (trans == PlasmaNoTrans) ? K : N, B, LDB, work); Cinitnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); Cplasmanorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cplasma, LDC, work); cblas_zher2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, N, K, CBLAS_SADDR(alpha), A, LDA, B, LDB, (beta), Cref, LDC); Clapacknorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); cblas_zaxpy(LDC*N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1); Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); eps = LAPACKE_dlamch_work('e'); printf("Rnorm %e, Anorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n", Rnorm, Anorm, Cinitnorm, Cplasmanorm, Clapacknorm); result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps); printf("============\n"); printf("Checking the norm of the difference against reference ZHER2K \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||C||_oo).N.eps) = %e \n", result); if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
static int check_solution(PLASMA_enum side, PLASMA_enum uplo, int M, int N, PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB, PLASMA_Complex64_t beta, PLASMA_Complex64_t *Cref, PLASMA_Complex64_t *Cplasma, int LDC) { int info_solution, NrowA; double Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm; double eps; PLASMA_Complex64_t beta_const; double result; double *work = (double *)malloc(max(M, N)* sizeof(double)); beta_const = (PLASMA_Complex64_t)-1.0; NrowA = (side == PlasmaLeft) ? M : N; Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), NrowA, NrowA, A, LDA, work); Bnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, B, LDB, work); Cinitnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); Cplasmanorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cplasma, LDC, work); cblas_zsymm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, M, N, CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC); Clapacknorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); cblas_zaxpy(LDC * N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1); Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); eps = LAPACKE_dlamch_work('e'); printf("Rnorm %e, Anorm %e, Bnorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n",Rnorm,Anorm,Bnorm,Cinitnorm,Cplasmanorm,Clapacknorm); result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps); printf("============\n"); printf("Checking the norm of the difference against reference ZSYMM \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||B||_oo+||C||_oo).N.eps) = %e \n", result ); if ( isinf(Clapacknorm) || isinf(Cplasmanorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
double LAPACKE_zlange( int matrix_order, char norm, lapack_int m, lapack_int n, const lapack_complex_double* a, lapack_int lda ) { lapack_int info = 0; double res = 0.; double* work = NULL; if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_zlange", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK /* Optionally check input matrices for NaNs */ if( LAPACKE_zge_nancheck( matrix_order, m, n, a, lda ) ) { return -5; } #endif /* Allocate memory for working array(s) */ if( LAPACKE_lsame( norm, 'e' ) || LAPACKE_lsame( norm, 'f' ) ) { work = (double*)LAPACKE_malloc( sizeof(double) * MAX(1,m) ); if( work == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_0; } } /* Call middle-level interface */ res = LAPACKE_zlange_work( matrix_order, norm, m, n, a, lda, work ); /* Release memory and exit */ if( LAPACKE_lsame( norm, 'e' ) || LAPACKE_lsame( norm, 'f' ) ) { LAPACKE_free( work ); } exit_level_0: if( info == LAPACK_WORK_MEMORY_ERROR ) { LAPACKE_xerbla( "LAPACKE_zlange", info ); } return res; }
DLLEXPORT double z_matrix_norm(char norm, lapack_int m, lapack_int n, lapack_complex_double a[], double work[]) { return LAPACKE_zlange_work(CblasColMajor, norm, m, n, a, m, work); }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { plasma_context_t *plasma; Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer; PLASMA_Complex64_t *A, *A2 = NULL; real_Double_t t; int *ipiv, *ipiv2 = NULL; int i; int m = iparam[TIMING_N]; int n = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = m; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* Allocate Data */ A = (PLASMA_Complex64_t *)malloc(lda*n*sizeof(PLASMA_Complex64_t)); /* Check if unable to allocate memory */ if ( (! A) ) { printf("Out of Memory \n "); return -1; } /* Initialiaze Data */ LAPACKE_zlarnv_work(1, ISEED, lda*n, A); /* Allocate Workspace */ ipiv = (int *)malloc( n*sizeof(int) ); /* Save A in lapack layout for check */ if ( check ) { A2 = (PLASMA_Complex64_t *)malloc(lda*n*sizeof(PLASMA_Complex64_t)); ipiv2 = (int *)malloc( n*sizeof(int) ); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,' ', m, n, A, lda, A2, lda); LAPACKE_zgetrf_work(LAPACK_COL_MAJOR, m, n, A2, lda, ipiv2 ); } plasma = plasma_context_self(); PLASMA_Sequence_Create(&sequence); QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence); QUARK_Task_Flag_Set(&task_flags, TASK_THREAD_COUNT, iparam[TIMING_THRDNBR] ); plasma_dynamic_spawn(); CORE_zgetrf_reclap_init(); t = -cWtime(); QUARK_CORE_zgetrf_reclap(plasma->quark, &task_flags, m, n, n, A, lda, ipiv, sequence, &request, 0, 0, iparam[TIMING_THRDNBR]); PLASMA_Sequence_Wait(sequence); t += cWtime(); *t_ = t; PLASMA_Sequence_Destroy(sequence); /* Check the solution */ if ( check ) { double *work = (double *)malloc(max(m,n)*sizeof(double)); /* Check ipiv */ for(i=0; i<n; i++) { if( ipiv[i] != ipiv2[i] ) { fprintf(stderr, "\nPLASMA (ipiv[%d] = %d, A[%d] = %e) / LAPACK (ipiv[%d] = %d, A[%d] = [%e])\n", i, ipiv[i], i, creal(A[ i * lda + i ]), i, ipiv2[i], i, creal(A2[ i * lda + i ])); break; } } dparam[TIMING_ANORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A, lda, work); dparam[TIMING_XNORM] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A2, lda, work); dparam[TIMING_BNORM] = 0.0; CORE_zaxpy( m, n, -1.0, A, lda, A2, lda); dparam[TIMING_RES] = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A2, lda, work); free( A2 ); free( ipiv2 ); free( work ); } free( A ); free( ipiv ); PLASMA_Finalize(); return 0; }
static int check_transformation(int itype, int uplo, int N, PLASMA_Complex64_t *A1, PLASMA_Complex64_t *A2, int LDA, PLASMA_Complex64_t *B2, int LDB, double eps) { PLASMA_Complex64_t alpha = 1.0; double Anorm, Rnorm, result; int info_transformation; int i, j; char *str; PLASMA_Complex64_t *Residual = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *Aorig = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t)); double *work = (double *)malloc(N*sizeof(double)); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'a', N, N, A1, LDA, Residual, N); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, Aorig, N); /* Rebuild the symmetry of A2 */ if (uplo == PlasmaLower) { for (j = 0; j < N; j++) for (i = j+1; i < N; i++) Aorig[j+i*N] = conj(Aorig[i+j*N]); } else { for (i = 0; i < N; i++) for (j = i+1; j < N; j++) Aorig[j+i*N] = conj(Aorig[i+j*N]); } if (itype == 1) { if (uplo == PlasmaLower) { str = "L*A2*L'"; cblas_ztrmm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); cblas_ztrmm(CblasColMajor, CblasRight, CblasLower, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); } else{ str = "U'*A2*U"; cblas_ztrmm(CblasColMajor, CblasLeft, CblasUpper, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); cblas_ztrmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); } } else { if (uplo == PlasmaLower) { str = "inv(L')*A2*inv(L)"; cblas_ztrsm(CblasColMajor, CblasLeft, CblasLower, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); cblas_ztrsm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); } else{ str = "inv(U)*A2*inv(U')"; cblas_ztrsm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); cblas_ztrsm(CblasColMajor, CblasRight, CblasUpper, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); } } /* Compute the Residual ( A1 - W ) */ for (i = 0; i < N; i++) for (j = 0; j < N; j++) Residual[j*N+i] = Aorig[j*N+i] - Residual[j*N+i]; Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N, work); Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work); result = Rnorm / (Anorm * N *eps); printf("============\n"); printf("Checking the global transformation \n"); printf("-- ||A1-%s||_oo/(||A1||_oo.N.eps) = %e \n", str, result ); if (isnan(result) || isinf(result) || (result > 60.0) ) { printf("-- Transformation is suspicious ! \n"); info_transformation = 1; } else { printf("-- Transformation is CORRECT ! \n"); info_transformation = 0; } free(Residual); free(Aorig); free(work); return info_transformation; }
int check_factorization(int M, int N, PLASMA_Complex64_t *A1, PLASMA_Complex64_t *A2, int LDA, PLASMA_Complex64_t *Q) { double Anorm, Rnorm; PLASMA_Complex64_t alpha, beta; int info_factorization; int i,j; double eps; eps = LAPACKE_dlamch_work('e'); PLASMA_Complex64_t *Ql = (PLASMA_Complex64_t *)malloc(M*N*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *Residual = (PLASMA_Complex64_t *)malloc(M*N*sizeof(PLASMA_Complex64_t)); double *work = (double *)malloc(max(M,N)*sizeof(double)); alpha=1.0; beta=0.0; if (M >= N) { /* Extract the R */ PLASMA_Complex64_t *R = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t)); memset((void*)R, 0, N*N*sizeof(PLASMA_Complex64_t)); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'u', M, N, A2, LDA, R, N); /* Perform Ql=Q*R */ memset((void*)Ql, 0, M*N*sizeof(PLASMA_Complex64_t)); cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, N, CBLAS_SADDR(alpha), Q, LDA, R, N, CBLAS_SADDR(beta), Ql, M); free(R); } else { /* Extract the L */ PLASMA_Complex64_t *L = (PLASMA_Complex64_t *)malloc(M*M*sizeof(PLASMA_Complex64_t)); memset((void*)L, 0, M*M*sizeof(PLASMA_Complex64_t)); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,'l', M, N, A2, LDA, L, M); /* Perform Ql=LQ */ memset((void*)Ql, 0, M*N*sizeof(PLASMA_Complex64_t)); cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, M, CBLAS_SADDR(alpha), L, M, Q, LDA, CBLAS_SADDR(beta), Ql, M); free(L); } /* Compute the Residual */ for (i = 0; i < M; i++) for (j = 0 ; j < N; j++) Residual[j*M+i] = A1[j*LDA+i]-Ql[j*M+i]; Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Residual, M, work); Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, A2, LDA, work); if (M >= N) { printf("============\n"); printf("Checking the QR Factorization \n"); printf("-- ||A-QR||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps)); } else { printf("============\n"); printf("Checking the LQ Factorization \n"); printf("-- ||A-LQ||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps)); } if (isnan(Rnorm / (Anorm * N *eps)) || (Rnorm / (Anorm * N * eps) > 10.0) ) { printf("-- Factorization is suspicious ! \n"); info_factorization = 1; } else { printf("-- Factorization is CORRECT ! \n"); info_factorization = 0; } free(work); free(Ql); free(Residual); return info_factorization; }
static int check_solution(PLASMA_enum transA, PLASMA_enum transB, int M, int N, int K, PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB, PLASMA_Complex64_t beta, PLASMA_Complex64_t *Cref, PLASMA_Complex64_t *Cplasma, int LDC) { int info_solution; double Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm, result; double eps; PLASMA_Complex64_t beta_const; double *work = (double *)malloc(max(K,max(M, N))* sizeof(double)); int Am, An, Bm, Bn; beta_const = -1.0; if (transA == PlasmaNoTrans) { Am = M; An = K; } else { Am = K; An = M; } if (transB == PlasmaNoTrans) { Bm = K; Bn = N; } else { Bm = N; Bn = K; } Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), Am, An, A, LDA, work); Bnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), Bm, Bn, B, LDB, work); Cinitnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); Cplasmanorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cplasma, LDC, work); cblas_zgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K, CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC); Clapacknorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); cblas_zaxpy(LDC * N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1); Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); eps = LAPACKE_dlamch_work('e'); printf("Rnorm %e, Anorm %e, Bnorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n", Rnorm, Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm); result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps); printf("============\n"); printf("Checking the norm of the difference against reference ZGEMM \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||B||_oo+||C||_oo).N.eps) = %e \n", result); if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }