void CORE_ctrtri(int uplo, int diag, int N, PLASMA_Complex32_t *A, int LDA, int *info) { *info = LAPACKE_ctrtri_work( LAPACK_COL_MAJOR, lapack_const(uplo), lapack_const(diag), N, A, LDA); }
int check_factorization(int N, PLASMA_Complex32_t *A1, PLASMA_Complex32_t *A2, int LDA, int uplo) { float Anorm, Rnorm; PLASMA_Complex32_t alpha; int info_factorization; int i,j; float eps; eps = LAPACKE_slamch_work('e'); PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *L1 = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *L2 = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t)); float *work = (float *)malloc(N*sizeof(float)); memset((void*)L1, 0, N*N*sizeof(PLASMA_Complex32_t)); memset((void*)L2, 0, N*N*sizeof(PLASMA_Complex32_t)); alpha= 1.0; LAPACKE_clacpy_work(LAPACK_COL_MAJOR,' ', N, N, A1, LDA, Residual, N); /* Dealing with L'L or U'U */ if (uplo == PlasmaUpper){ LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L1, N); LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L2, N); cblas_ctrmm(CblasColMajor, CblasLeft, CblasUpper, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), L1, N, L2, N); } else{ LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L1, N); LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L2, N); cblas_ctrmm(CblasColMajor, CblasRight, CblasLower, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), L1, N, L2, N); } /* Compute the Residual || A -L'L|| */ for (i = 0; i < N; i++) for (j = 0; j < N; j++) Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i]; Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N, work); Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work); printf("============\n"); printf("Checking the Cholesky Factorization \n"); printf("-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps)); if ( isnan(Rnorm/(Anorm*N*eps)) || (Rnorm/(Anorm*N*eps) > 10.0) ){ printf("-- Factorization is suspicious ! \n"); info_factorization = 1; } else{ printf("-- Factorization is CORRECT ! \n"); info_factorization = 0; } free(Residual); free(L1); free(L2); free(work); return info_factorization; }
void CORE_slansy(int norm, int uplo, int N, float *A, int LDA, float *work, float *normA) { *normA = LAPACKE_slansy_work( LAPACK_COL_MAJOR, lapack_const(norm), lapack_const(uplo), N, A, LDA, work); }
void CORE_zlanhe(int norm, int uplo, int N, PLASMA_Complex64_t *A, int LDA, double *work, double *normA) { *normA = LAPACKE_zlanhe_work( LAPACK_COL_MAJOR, lapack_const(norm), lapack_const(uplo), N, A, LDA, work); }
int check_solution(int M, int N, int NRHS, PLASMA_Complex32_t *A1, int LDA, PLASMA_Complex32_t *B1, PLASMA_Complex32_t *B2, int LDB) { int info_solution; float Rnorm, Anorm, Xnorm, Bnorm; PLASMA_Complex32_t alpha, beta; float *work = (float *)malloc(max(M, N)* sizeof(float)); float eps; eps = LAPACKE_slamch_work('e'); alpha = 1.0; beta = -1.0; Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, A1, LDA, work); Xnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, NRHS, B2, LDB, work); Bnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work); cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, NRHS, N, CBLAS_SADDR(alpha), A1, LDA, B2, LDB, CBLAS_SADDR(beta), B1, LDB); if (M >= N) { PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(M*NRHS*sizeof(PLASMA_Complex32_t)); memset((void*)Residual, 0, M*NRHS*sizeof(PLASMA_Complex32_t)); cblas_cgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, N, NRHS, M, CBLAS_SADDR(alpha), A1, LDA, B1, LDB, CBLAS_SADDR(beta), Residual, M); Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, NRHS, Residual, M, work); free(Residual); } else { PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(N*NRHS*sizeof(PLASMA_Complex32_t)); memset((void*)Residual, 0, N*NRHS*sizeof(PLASMA_Complex32_t)); cblas_cgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, N, NRHS, M, CBLAS_SADDR(alpha), A1, LDA, B1, LDB, CBLAS_SADDR(beta), Residual, N); Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, Residual, N, work); free(Residual); } printf("============\n"); printf("Checking the Residual of the solution \n"); printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||)_oo.N.eps) = %e \n",Rnorm/((Anorm*Xnorm+Bnorm)*N*eps)); if (isnan(Rnorm / ((Anorm * Xnorm + Bnorm) * N * eps)) || (Rnorm / ((Anorm * Xnorm + Bnorm) * N * eps) > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
static int check_solution(PLASMA_enum uplo, PLASMA_enum trans, int N, int K, float alpha, PLASMA_Complex32_t *A, int LDA, float beta, PLASMA_Complex32_t *Cref, PLASMA_Complex32_t *Cplasma, int LDC) { int info_solution; float Anorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm; float eps; PLASMA_Complex32_t beta_const; float result; float *work = (float *)malloc(max(N, K)* sizeof(float)); beta_const = -1.0; Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), (trans == PlasmaNoTrans) ? N : K, (trans == PlasmaNoTrans) ? K : N, A, LDA, work); Cinitnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); Cplasmanorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cplasma, LDC, work); cblas_cherk(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, N, K, (alpha), A, LDA, (beta), Cref, LDC); Clapacknorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); cblas_caxpy(LDC*N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1); Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); eps = LAPACKE_slamch_work('e'); printf("Rnorm %e, Anorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n", Rnorm, Anorm, Cinitnorm, Cplasmanorm, Clapacknorm); result = Rnorm / ((Anorm + Cinitnorm) * N * eps); printf("============\n"); printf("Checking the norm of the difference against reference CHERK \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||C||_oo).N.eps) = %e \n", result); if ( isinf(Clapacknorm) || isinf(Cplasmanorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
int check_solution(int M, int N, int NRHS, double *A1, int LDA, double *B1, double *B2, int LDB) { int info_solution; double Rnorm, Anorm, Xnorm, Bnorm; double alpha, beta; double *work = (double *)malloc(max(M, N)* sizeof(double)); double eps; eps = LAPACKE_dlamch_work('e'); alpha = 1.0; beta = -1.0; Anorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, A1, LDA, work); Xnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, NRHS, B2, LDB, work); Bnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, NRHS, N, (alpha), A1, LDA, B2, LDB, (beta), B1, LDB); if (M >= N) { double *Residual = (double *)malloc(M*NRHS*sizeof(double)); memset((void*)Residual, 0, M*NRHS*sizeof(double)); cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, N, NRHS, M, (alpha), A1, LDA, B1, LDB, (beta), Residual, M); Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, NRHS, Residual, M, work); free(Residual); } else { double *Residual = (double *)malloc(N*NRHS*sizeof(double)); memset((void*)Residual, 0, N*NRHS*sizeof(double)); cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, N, NRHS, M, (alpha), A1, LDA, B1, LDB, (beta), Residual, N); Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, Residual, N, work); free(Residual); } printf("============\n"); printf("Checking the Residual of the solution \n"); printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||)_oo.N.eps) = %e \n",Rnorm/((Anorm*Xnorm+Bnorm)*N*eps)); if (isnan(Rnorm / ((Anorm * Xnorm + Bnorm) * N * eps)) || (Rnorm / ((Anorm * Xnorm + Bnorm) * N * eps) > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
/*------------------------------------------------------------------------ * Check the factorization of the matrix A2 */ static int check_factorization(int N, PLASMA_Complex64_t *A1, PLASMA_Complex64_t *A2, int LDA, int uplo, double eps) { PLASMA_Complex64_t alpha = 1.0; double Anorm, Rnorm, result; int info_factorization; int i,j; PLASMA_Complex64_t *Residual = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *L1 = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *L2 = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t)); double *work = (double *)malloc(N*sizeof(double)); memset((void*)L1, 0, N*N*sizeof(PLASMA_Complex64_t)); memset((void*)L2, 0, N*N*sizeof(PLASMA_Complex64_t)); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR,' ', N, N, A1, LDA, Residual, N); /* Dealing with L'L or U'U */ LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, L1, N); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, L2, N); if (uplo == PlasmaUpper) cblas_ztrmm(CblasColMajor, CblasLeft, (CBLAS_UPLO)uplo, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), L1, N, L2, N); else cblas_ztrmm(CblasColMajor, CblasRight, (CBLAS_UPLO)uplo, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), L1, N, L2, N); /* Compute the Residual || A - L'L|| */ for (i = 0; i < N; i++) for (j = 0; j < N; j++) Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i]; Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N, work); Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work); result = Rnorm / ( Anorm * N * eps ); printf("============\n"); printf("Checking the Cholesky Factorization \n"); printf("-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e \n", result); if ( isnan(result) || isinf(result) || (result > 60.0) ){ printf("-- Factorization is suspicious ! \n"); info_factorization = 1; } else{ printf("-- Factorization is CORRECT ! \n"); info_factorization = 0; } free(Residual); free(L1); free(L2); free(work); return info_factorization; }
static int check_solution(int M, int N, int NRHS, double *A1, int LDA, double *B1, double *B2, int LDB, double eps) { double alpha = 1.0; double beta = 0.0; double *Residual; int info_solution; int maxMN = max(M, N); double Rnorm, Anorm, Xnorm, Bnorm, result; double *work = (double *)malloc( max(maxMN, NRHS)* sizeof(double)); Anorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, A1, LDA, work); Xnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, NRHS, B2, LDB, work); Bnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, NRHS, N, (alpha), A1, LDA, B2, LDB, (beta), B1, LDB); Residual = (double *)malloc(maxMN*NRHS*sizeof(double)); memset((void*)Residual, 0, maxMN*NRHS*sizeof(double)); cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, N, NRHS, M, (alpha), A1, LDA, B1, LDB, (beta), Residual, maxMN); Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), maxMN, NRHS, Residual, maxMN, work); free(Residual); free(work); if (getenv("PLASMA_TESTING_VERBOSE")) printf( "||A||_oo=%f\n||X||_oo=%f\n||B||_oo=%f\n||A X - B||_oo=%e\n", Anorm, Xnorm, Bnorm, Rnorm ); result = Rnorm / ( (Anorm*Xnorm+Bnorm)*N*eps ) ; printf("============\n"); printf("Checking the Residual of the solution \n"); printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n", result); if ( isnan(Xnorm) || isinf(Xnorm) || isnan(result) || isinf(result) || (result > 60.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else{ printf("-- The solution is CORRECT ! \n"); info_solution = 0; } return info_solution; }
/*------------------------------------------------------------------------ * Check the accuracy of the condition estimator */ static int check_estimator(PLASMA_enum uplo, int N, PLASMA_Complex64_t *A1, int LDA, PLASMA_Complex64_t *A2, double Anorm, double Acond, double eps) { int info_solution; double result, Acond_lapack; double invcond, invcond_lapack; info_solution = LAPACKE_zpocon(LAPACK_COL_MAJOR, lapack_const(uplo), N, A2, LDA, Anorm, &Acond_lapack); if ( info_solution != 0 ) { printf(" PLASMA_zgecon returned info = %d\n", info_solution ); return info_solution; } invcond_lapack = 1. / ( Acond_lapack ); invcond = 1. / ( Acond ); printf("============\n"); printf("Checking the condition number \n"); printf("-- Acond_plasma = %e, Acond_lapack = %e \n" "-- Ainvcond_plasma = %e, Ainvcond_lapack = %e \n", Acond, Acond_lapack, invcond, invcond_lapack ); result = fabs( Acond_lapack - Acond ) / eps; if ( result > 60. ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else{ printf("-- The solution is CORRECT ! \n"); info_solution = 0; } return info_solution; }
void CORE_slansy_quark(Quark *quark) { float *normA; int norm; int uplo; int N; float *A; int LDA; float *work; quark_unpack_args_7(quark, normA, norm, uplo, N, A, LDA, work); *normA = LAPACKE_slansy_work( LAPACK_COL_MAJOR, lapack_const(norm), lapack_const(uplo), N, A, LDA, work); }
void CORE_zlanhe_quark(Quark *quark) { double *normA; int norm; int uplo; int N; PLASMA_Complex64_t *A; int LDA; double *work; quark_unpack_args_7(quark, normA, norm, uplo, N, A, LDA, work); *normA = LAPACKE_zlanhe_work( LAPACK_COL_MAJOR, lapack_const(norm), lapack_const(uplo), N, A, LDA, work); }
void CORE_zhegst(int itype, PLASMA_enum uplo, int N, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB, int *INFO) { *INFO = LAPACKE_zhegst_work( LAPACK_COL_MAJOR, itype, lapack_const(uplo), N, A, LDA, B, LDB ); }
void CORE_ssygst(int itype, PLASMA_enum uplo, int N, float *A, int LDA, float *B, int LDB, int *INFO) { *INFO = LAPACKE_ssygst_work( LAPACK_COL_MAJOR, itype, lapack_const(uplo), N, A, LDA, B, LDB ); }
void CORE_clange(int norm, int M, int N, PLASMA_Complex32_t *A, int LDA, float *work, float *normA) { *normA = LAPACKE_clange_work( LAPACK_COL_MAJOR, lapack_const(norm), M, N, A, LDA, work); }
void CORE_clacpy(PLASMA_enum uplo, int M, int N, PLASMA_Complex32_t *A, int LDA, PLASMA_Complex32_t *B, int LDB) { LAPACKE_clacpy_work( LAPACK_COL_MAJOR, lapack_const(uplo), M, N, A, LDA, B, LDB); }
void CORE_dlange(int norm, int M, int N, double *A, int LDA, double *work, double *normA) { *normA = LAPACKE_dlange_work( LAPACK_COL_MAJOR, lapack_const(norm), M, N, A, LDA, work); }
void CORE_ctrtri_quark(Quark *quark) { int uplo; int diag; int N; PLASMA_Complex32_t *A; int LDA; PLASMA_sequence *sequence; PLASMA_request *request; int iinfo; int info; quark_unpack_args_8(quark, uplo, diag, N, A, LDA, sequence, request, iinfo); info = LAPACKE_ctrtri_work( LAPACK_COL_MAJOR, lapack_const(uplo), lapack_const(diag), N, A, LDA); if ((sequence->status == PLASMA_SUCCESS) && (info != 0)) plasma_sequence_flush(quark, sequence, request, iinfo + info); }
void CORE_zlaset2(PLASMA_enum uplo, int M, int N, PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA) { if (uplo == PlasmaUpper) { LAPACKE_zlaset_work( LAPACK_COL_MAJOR, lapack_const(uplo), M, N-1, alpha, alpha, A+LDA, LDA); } else if (uplo == PlasmaLower) { LAPACKE_zlaset_work( LAPACK_COL_MAJOR, lapack_const(uplo), M-1, N, alpha, alpha, A+1, LDA); } else { LAPACKE_zlaset_work( LAPACK_COL_MAJOR, lapack_const(uplo), M, N, alpha, alpha, A, LDA); } }
static int check_solution(int N, int NRHS, PLASMA_Complex64_t *A1, int LDA, PLASMA_Complex64_t *B1, PLASMA_Complex64_t *B2, int LDB, double eps ) { int info_solution; double Rnorm, Anorm, Xnorm, Bnorm, result; PLASMA_Complex64_t alpha, beta; double *work = (double *)malloc(N*sizeof(double)); alpha = 1.0; beta = -1.0; Xnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B2, LDB, work); Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work); Bnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work); cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, NRHS, N, CBLAS_SADDR(alpha), A1, LDA, B2, LDB, CBLAS_SADDR(beta), B1, LDB); Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work); if (getenv("PLASMA_TESTING_VERBOSE")) printf( "||A||_oo=%f\n||X||_oo=%f\n||B||_oo=%f\n||A X - B||_oo=%e\n", Anorm, Xnorm, Bnorm, Rnorm ); result = Rnorm / ( (Anorm*Xnorm+Bnorm)*N*eps ) ; printf("============\n"); printf("Checking the Residual of the solution \n"); printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n", result); if ( isnan(Xnorm) || isinf(Xnorm) || isnan(result) || isinf(result) || (result > 60.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else{ printf("-- The solution is CORRECT ! \n"); info_solution = 0; } free(work); return info_solution; }
int check_solution(int N, int NRHS, PLASMA_Complex32_t *A1, int LDA, PLASMA_Complex32_t *B1, PLASMA_Complex32_t *B2, int LDB ) { int info_solution; float Rnorm, Anorm, Xnorm, Bnorm; PLASMA_Complex32_t alpha, beta; float *work = (float *)malloc(N*sizeof(float)); float eps; eps = LAPACKE_slamch_work('e'); /* Initialize A1 and A2 for Symmetric Positive Matrix */ alpha = 1.0; beta = -1.0; Xnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B2, LDB, work); Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work); Bnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work); cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, NRHS, N, CBLAS_SADDR(alpha), A1, LDA, B2, LDB, CBLAS_SADDR(beta), B1, LDB); Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work); printf("============\n"); printf("Checking the Residual of the solution \n"); printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n",Rnorm/((Anorm*Xnorm+Bnorm)*N*eps)); if (Rnorm/((Anorm*Xnorm+Bnorm)*N*eps) > 10.0){ printf("-- The solution is suspicious ! \n"); info_solution = 1; } else{ printf("-- The solution is CORRECT ! \n"); info_solution = 0; } free(work); return info_solution; }
int check_solution(int N, int NRHS, float *A1, int LDA, float *B1, float *B2, int LDB) { int info_solution; float Rnorm, Anorm, Xnorm, Bnorm; float alpha, beta; float *work = (float *)malloc(N*sizeof(float)); float eps; eps = LAPACKE_slamch_work('e'); alpha = 1.0; beta = -1.0; Xnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B2, LDB, work); Anorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work); Bnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work); cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, NRHS, N, (alpha), A1, LDA, B2, LDB, (beta), B1, LDB); Rnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work); printf("============\n"); printf("Checking the Residual of the solution \n"); printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n",Rnorm/((Anorm*Xnorm+Bnorm)*N*eps)); if ( isnan(Rnorm/((Anorm*Xnorm+Bnorm)*N*eps)) || (Rnorm/((Anorm*Xnorm+Bnorm)*N*eps) > 10.0) ){ printf("-- The solution is suspicious ! \n"); info_solution = 1; } else{ printf("-- The solution is CORRECT ! \n"); info_solution = 0; } free(work); return info_solution; }
void CORE_dlange_quark(Quark *quark) { double *normA; int norm; int M; int N; double *A; int LDA; double *work; quark_unpack_args_7(quark, norm, M, N, A, LDA, work, normA); *normA = LAPACKE_dlange_work( LAPACK_COL_MAJOR, lapack_const(norm), M, N, A, LDA, work); }
void CORE_clacpy_quark(Quark *quark) { PLASMA_enum uplo; int M; int N; PLASMA_Complex32_t *A; int LDA; PLASMA_Complex32_t *B; int LDB; quark_unpack_args_7(quark, uplo, M, N, A, LDA, B, LDB); LAPACKE_clacpy_work( LAPACK_COL_MAJOR, lapack_const(uplo), M, N, A, LDA, B, LDB); }
void CORE_clange_quark(Quark *quark) { float *normA; int norm; int M; int N; PLASMA_Complex32_t *A; int LDA; float *work; quark_unpack_args_7(quark, norm, M, N, A, LDA, work, normA); *normA = LAPACKE_clange_work( LAPACK_COL_MAJOR, lapack_const(norm), M, N, A, LDA, work); }
static int check_solution(PLASMA_enum uplo, PLASMA_enum trans, int N, int K, double alpha, double *A, int LDA, double *B, int LDB, double beta, double *Cref, double *Cplasma, int LDC) { int info_solution; double Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm, result; double eps; double beta_const; double *work = (double *)malloc(max(N, K)* sizeof(double)); beta_const = -1.0; Anorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), (trans == PlasmaNoTrans) ? N : K, (trans == PlasmaNoTrans) ? K : N, A, LDA, work); Bnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), (trans == PlasmaNoTrans) ? N : K, (trans == PlasmaNoTrans) ? K : N, B, LDB, work); Cinitnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); Cplasmanorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cplasma, LDC, work); cblas_dsyr2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, N, K, (alpha), A, LDA, B, LDB, (beta), Cref, LDC); Clapacknorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); cblas_daxpy(LDC*N, (beta_const), Cplasma, 1, Cref, 1); Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); eps = LAPACKE_dlamch_work('e'); printf("Rnorm %e, Anorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n", Rnorm, Anorm, Cinitnorm, Cplasmanorm, Clapacknorm); result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps); printf("============\n"); printf("Checking the norm of the difference against reference DSYR2K \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||C||_oo).N.eps) = %e \n", result); if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
int check_orthogonality(int M, int N, int LDQ, double *Q) { double alpha, beta; double normQ; int info_ortho; int i; int minMN = min(M, N); double eps; double *work = (double *)malloc(minMN*sizeof(double)); eps = LAPACKE_dlamch_work('e'); alpha = 1.0; beta = -1.0; /* Build the idendity matrix USE DLASET?*/ double *Id = (double *) malloc(minMN*minMN*sizeof(double)); memset((void*)Id, 0, minMN*minMN*sizeof(double)); for (i = 0; i < minMN; i++) Id[i*minMN+i] = (double)1.0; /* Perform Id - Q'Q */ if (M >= N) cblas_dsyrk(CblasColMajor, CblasUpper, CblasTrans, N, M, alpha, Q, LDQ, beta, Id, N); else cblas_dsyrk(CblasColMajor, CblasUpper, CblasNoTrans, M, N, alpha, Q, LDQ, beta, Id, M); normQ = LAPACKE_dlansy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), 'u', minMN, Id, minMN, work); printf("============\n"); printf("Checking the orthogonality of Q \n"); printf("||Id-Q'*Q||_oo / (N*eps) = %e \n",normQ/(minMN*eps)); if ( isnan(normQ / (minMN * eps)) || (normQ / (minMN * eps) > 10.0) ) { printf("-- Orthogonality is suspicious ! \n"); info_ortho=1; } else { printf("-- Orthogonality is CORRECT ! \n"); info_ortho=0; } free(work); free(Id); return info_ortho; }
static int check_solution(PLASMA_enum side, PLASMA_enum uplo, int M, int N, PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB, PLASMA_Complex64_t beta, PLASMA_Complex64_t *Cref, PLASMA_Complex64_t *Cplasma, int LDC) { int info_solution, NrowA; double Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm; double eps; PLASMA_Complex64_t beta_const; double result; double *work = (double *)malloc(max(M, N)* sizeof(double)); beta_const = (PLASMA_Complex64_t)-1.0; NrowA = (side == PlasmaLeft) ? M : N; Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), NrowA, NrowA, A, LDA, work); Bnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, B, LDB, work); Cinitnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); Cplasmanorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cplasma, LDC, work); cblas_zsymm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, M, N, CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC); Clapacknorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); cblas_zaxpy(LDC * N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1); Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); eps = LAPACKE_dlamch_work('e'); printf("Rnorm %e, Anorm %e, Bnorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n",Rnorm,Anorm,Bnorm,Cinitnorm,Cplasmanorm,Clapacknorm); result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps); printf("============\n"); printf("Checking the norm of the difference against reference ZSYMM \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||B||_oo+||C||_oo).N.eps) = %e \n", result ); if ( isinf(Clapacknorm) || isinf(Cplasmanorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
/*------------------------------------------------------------------- * Check the orthogonality of Q */ static int check_orthogonality(int M, int N, double *Q, int LDQ, double eps) { double alpha = 1.0; double beta = -1.0; double normQ, result; int info_ortho; int minMN = min(M, N); double *work = (double *)malloc(minMN*sizeof(double)); /* Build the idendity matrix */ double *Id = (double *) malloc(minMN*minMN*sizeof(double)); LAPACKE_dlaset_work(LAPACK_COL_MAJOR, 'A', minMN, minMN, 0., 1., Id, minMN); /* Perform Id - Q'Q */ if (M >= N) cblas_dsyrk(CblasColMajor, CblasUpper, CblasTrans, N, M, alpha, Q, LDQ, beta, Id, N); else cblas_dsyrk(CblasColMajor, CblasUpper, CblasNoTrans, M, N, alpha, Q, LDQ, beta, Id, M); normQ = LAPACKE_dlansy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), 'U', minMN, Id, minMN, work); result = normQ / (minMN * eps); printf("============\n"); printf("Checking the orthogonality of Q \n"); printf("||Id-Q'*Q||_oo / (minMN*eps) = %e \n", result); if ( isnan(result) || isinf(result) || (result > 60.0) ) { printf("-- Orthogonality is suspicious ! \n"); info_ortho=1; } else { printf("-- Orthogonality is CORRECT ! \n"); info_ortho=0; } free(work); free(Id); return info_ortho; }
void CORE_zhegst_quark(Quark *quark) { int itype; PLASMA_enum uplo; int n; PLASMA_Complex64_t *A; int lda; PLASMA_Complex64_t *B; int ldb; PLASMA_sequence *sequence; PLASMA_request *request; int iinfo; int info; quark_unpack_args_10(quark, itype, uplo, n, A, lda, B, ldb, sequence, request, iinfo); info = LAPACKE_zhegst_work( LAPACK_COL_MAJOR, itype, lapack_const(uplo), n, A, lda, B, ldb); if (sequence->status == PLASMA_SUCCESS && info != 0) plasma_sequence_flush(quark, sequence, request, iinfo+info); }