int check_factorization(int N, PLASMA_Complex32_t *A1, PLASMA_Complex32_t *A2, int LDA, int uplo) { float Anorm, Rnorm; PLASMA_Complex32_t alpha; int info_factorization; int i,j; float eps; eps = LAPACKE_slamch_work('e'); PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *L1 = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t)); PLASMA_Complex32_t *L2 = (PLASMA_Complex32_t *)malloc(N*N*sizeof(PLASMA_Complex32_t)); float *work = (float *)malloc(N*sizeof(float)); memset((void*)L1, 0, N*N*sizeof(PLASMA_Complex32_t)); memset((void*)L2, 0, N*N*sizeof(PLASMA_Complex32_t)); alpha= 1.0; LAPACKE_clacpy_work(LAPACK_COL_MAJOR,' ', N, N, A1, LDA, Residual, N); /* Dealing with L'L or U'U */ if (uplo == PlasmaUpper){ LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L1, N); LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'u', N, N, A2, LDA, L2, N); cblas_ctrmm(CblasColMajor, CblasLeft, CblasUpper, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), L1, N, L2, N); } else{ LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L1, N); LAPACKE_clacpy_work(LAPACK_COL_MAJOR,'l', N, N, A2, LDA, L2, N); cblas_ctrmm(CblasColMajor, CblasRight, CblasLower, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), L1, N, L2, N); } /* Compute the Residual || A -L'L|| */ for (i = 0; i < N; i++) for (j = 0; j < N; j++) Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i]; Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N, work); Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work); printf("============\n"); printf("Checking the Cholesky Factorization \n"); printf("-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps)); if ( isnan(Rnorm/(Anorm*N*eps)) || (Rnorm/(Anorm*N*eps) > 10.0) ){ printf("-- Factorization is suspicious ! \n"); info_factorization = 1; } else{ printf("-- Factorization is CORRECT ! \n"); info_factorization = 0; } free(Residual); free(L1); free(L2); free(work); return info_factorization; }
static int check_solution(PLASMA_enum uplo, PLASMA_enum trans, int N, int K, float alpha, PLASMA_Complex32_t *A, int LDA, float beta, PLASMA_Complex32_t *Cref, PLASMA_Complex32_t *Cplasma, int LDC) { int info_solution; float Anorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm; float eps; PLASMA_Complex32_t beta_const; float result; float *work = (float *)malloc(max(N, K)* sizeof(float)); beta_const = -1.0; Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), (trans == PlasmaNoTrans) ? N : K, (trans == PlasmaNoTrans) ? K : N, A, LDA, work); Cinitnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); Cplasmanorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cplasma, LDC, work); cblas_cherk(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, N, K, (alpha), A, LDA, (beta), Cref, LDC); Clapacknorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); cblas_caxpy(LDC*N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1); Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); eps = LAPACKE_slamch_work('e'); printf("Rnorm %e, Anorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n", Rnorm, Anorm, Cinitnorm, Cplasmanorm, Clapacknorm); result = Rnorm / ((Anorm + Cinitnorm) * N * eps); printf("============\n"); printf("Checking the norm of the difference against reference CHERK \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||C||_oo).N.eps) = %e \n", result); if ( isinf(Clapacknorm) || isinf(Cplasmanorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
int check_solution(int M, int N, int NRHS, PLASMA_Complex32_t *A1, int LDA, PLASMA_Complex32_t *B1, PLASMA_Complex32_t *B2, int LDB) { int info_solution; float Rnorm, Anorm, Xnorm, Bnorm; PLASMA_Complex32_t alpha, beta; float *work = (float *)malloc(max(M, N)* sizeof(float)); float eps; eps = LAPACKE_slamch_work('e'); alpha = 1.0; beta = -1.0; Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, A1, LDA, work); Xnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, NRHS, B2, LDB, work); Bnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work); cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, NRHS, N, CBLAS_SADDR(alpha), A1, LDA, B2, LDB, CBLAS_SADDR(beta), B1, LDB); if (M >= N) { PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(M*NRHS*sizeof(PLASMA_Complex32_t)); memset((void*)Residual, 0, M*NRHS*sizeof(PLASMA_Complex32_t)); cblas_cgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, N, NRHS, M, CBLAS_SADDR(alpha), A1, LDA, B1, LDB, CBLAS_SADDR(beta), Residual, M); Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, NRHS, Residual, M, work); free(Residual); } else { PLASMA_Complex32_t *Residual = (PLASMA_Complex32_t *)malloc(N*NRHS*sizeof(PLASMA_Complex32_t)); memset((void*)Residual, 0, N*NRHS*sizeof(PLASMA_Complex32_t)); cblas_cgemm(CblasColMajor, CblasConjTrans, CblasNoTrans, N, NRHS, M, CBLAS_SADDR(alpha), A1, LDA, B1, LDB, CBLAS_SADDR(beta), Residual, N); Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, Residual, N, work); free(Residual); } printf("============\n"); printf("Checking the Residual of the solution \n"); printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||)_oo.N.eps) = %e \n",Rnorm/((Anorm*Xnorm+Bnorm)*N*eps)); if (isnan(Rnorm / ((Anorm * Xnorm + Bnorm) * N * eps)) || (Rnorm / ((Anorm * Xnorm + Bnorm) * N * eps) > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
void CORE_clange(int norm, int M, int N, PLASMA_Complex32_t *A, int LDA, float *work, float *normA) { *normA = LAPACKE_clange_work( LAPACK_COL_MAJOR, lapack_const(norm), M, N, A, LDA, work); }
int check_solution(int N, int NRHS, PLASMA_Complex32_t *A1, int LDA, PLASMA_Complex32_t *B1, PLASMA_Complex32_t *B2, int LDB ) { int info_solution; float Rnorm, Anorm, Xnorm, Bnorm; PLASMA_Complex32_t alpha, beta; float *work = (float *)malloc(N*sizeof(float)); float eps; eps = LAPACKE_slamch_work('e'); /* Initialize A1 and A2 for Symmetric Positive Matrix */ alpha = 1.0; beta = -1.0; Xnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B2, LDB, work); Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work); Bnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work); cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, NRHS, N, CBLAS_SADDR(alpha), A1, LDA, B2, LDB, CBLAS_SADDR(beta), B1, LDB); Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work); printf("============\n"); printf("Checking the Residual of the solution \n"); printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n",Rnorm/((Anorm*Xnorm+Bnorm)*N*eps)); if (Rnorm/((Anorm*Xnorm+Bnorm)*N*eps) > 10.0){ printf("-- The solution is suspicious ! \n"); info_solution = 1; } else{ printf("-- The solution is CORRECT ! \n"); info_solution = 0; } free(work); return info_solution; }
void CORE_clange_quark(Quark *quark) { float *normA; int norm; int M; int N; PLASMA_Complex32_t *A; int LDA; float *work; quark_unpack_args_7(quark, norm, M, N, A, LDA, work, normA); *normA = LAPACKE_clange_work( LAPACK_COL_MAJOR, lapack_const(norm), M, N, A, LDA, work); }
static int check_solution(PLASMA_enum side, PLASMA_enum uplo, int M, int N, PLASMA_Complex32_t alpha, PLASMA_Complex32_t *A, int LDA, PLASMA_Complex32_t *B, int LDB, PLASMA_Complex32_t beta, PLASMA_Complex32_t *Cref, PLASMA_Complex32_t *Cplasma, int LDC) { int info_solution, NrowA; float Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm; float eps; PLASMA_Complex32_t beta_const; float result; float *work = (float *)malloc(max(M, N)* sizeof(float)); beta_const = (PLASMA_Complex32_t)-1.0; NrowA = (side == PlasmaLeft) ? M : N; Anorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), NrowA, NrowA, A, LDA, work); Bnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, B, LDB, work); Cinitnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); Cplasmanorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cplasma, LDC, work); cblas_csymm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, M, N, CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC); Clapacknorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); cblas_caxpy(LDC * N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1); Rnorm = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); eps = LAPACKE_slamch_work('e'); printf("Rnorm %e, Anorm %e, Bnorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n",Rnorm,Anorm,Bnorm,Cinitnorm,Cplasmanorm,Clapacknorm); result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps); printf("============\n"); printf("Checking the norm of the difference against reference CSYMM \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||B||_oo+||C||_oo).N.eps) = %e \n", result ); if ( isinf(Clapacknorm) || isinf(Cplasmanorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
float LAPACKE_clange( int matrix_order, char norm, lapack_int m, lapack_int n, const lapack_complex_float* a, lapack_int lda ) { lapack_int info = 0; float res = 0.; float* work = NULL; if( matrix_order != LAPACK_COL_MAJOR && matrix_order != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_clange", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK /* Optionally check input matrices for NaNs */ if( LAPACKE_cge_nancheck( matrix_order, m, n, a, lda ) ) { return -5; } #endif /* Allocate memory for working array(s) */ if( LAPACKE_lsame( norm, 'e' ) || LAPACKE_lsame( norm, 'f' ) ) { work = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,m) ); if( work == NULL ) { info = LAPACK_WORK_MEMORY_ERROR; goto exit_level_0; } } /* Call middle-level interface */ res = LAPACKE_clange_work( matrix_order, norm, m, n, a, lda, work ); /* Release memory and exit */ if( LAPACKE_lsame( norm, 'e' ) || LAPACKE_lsame( norm, 'f' ) ) { LAPACKE_free( work ); } exit_level_0: if( info == LAPACK_WORK_MEMORY_ERROR ) { LAPACKE_xerbla( "LAPACKE_clange", info ); } return res; }
DLLEXPORT float c_matrix_norm(char norm, lapack_int m, lapack_int n, lapack_complex_float a[], float work[]) { return LAPACKE_clange_work(CblasColMajor, norm, m, n, a, m, work); }
static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { plasma_context_t *plasma; Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer; PLASMA_Complex32_t *A, *AT, *A2 = NULL; PLASMA_desc *descA; real_Double_t t; int *ipiv, *ipiv2 = NULL; int i; int nb = iparam[TIMING_NB]; int m = iparam[TIMING_N]; int n = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = m; PLASMA_sequence *sequence = NULL; PLASMA_request request = PLASMA_REQUEST_INITIALIZER; /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* Allocate Data */ A = (PLASMA_Complex32_t *)malloc(lda*n*sizeof(PLASMA_Complex32_t)); AT = (PLASMA_Complex32_t *)malloc(lda*n*sizeof(PLASMA_Complex32_t)); /* Check if unable to allocate memory */ if ( ( !AT ) || (! A) ) { printf("Out of Memory \n "); return -1; } /* Initialiaze Data */ LAPACKE_clarnv_work(1, ISEED, lda*n, A); /* for(i=0; i<n; i++) { */ /* A[i*lda+i] += (float)m; */ /* } */ PLASMA_Desc_Create(&descA, AT, PlasmaComplexFloat, nb, nb, nb*nb, lda, n, 0, 0, m, n); PLASMA_cLapack_to_Tile((void*)A, lda, descA); /* Allocate Workspace */ ipiv = (int *)malloc( n*sizeof(int) ); /* Save AT in lapack layout for check */ if ( check ) { A2 = (PLASMA_Complex32_t *)malloc(lda*n*sizeof(PLASMA_Complex32_t)); ipiv2 = (int *)malloc( n*sizeof(int) ); LAPACKE_clacpy_work(LAPACK_COL_MAJOR,' ', m, n, A, lda, A2, lda); LAPACKE_cgetrf_work(LAPACK_COL_MAJOR, m, n, A2, lda, ipiv2 ); } plasma = plasma_context_self(); PLASMA_Sequence_Create(&sequence); QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence); QUARK_Task_Flag_Set(&task_flags, TASK_THREAD_COUNT, iparam[TIMING_THRDNBR] ); plasma_dynamic_spawn(); CORE_cgetrf_rectil_init(); t = -cWtime(); QUARK_CORE_cgetrf_rectil(plasma->quark, &task_flags, *descA, AT, descA->mb*descA->nb, ipiv, sequence, &request, 0, 0, iparam[TIMING_THRDNBR]); PLASMA_Sequence_Wait(sequence); t += cWtime(); *t_ = t; PLASMA_Sequence_Destroy(sequence); /* Check the solution */ if ( check ) { float *work = (float *)malloc(max(m,n)*sizeof(float)); PLASMA_cTile_to_Lapack(descA, (void*)A, lda); /* Check ipiv */ for(i=0; i<n; i++) { if( ipiv[i] != ipiv2[i] ) { fprintf(stderr, "\nPLASMA (ipiv[%d] = %d, A[%d] = %e) / LAPACK (ipiv[%d] = %d, A[%d] = [%e])\n", i, ipiv[i], i, crealf(A[ i * lda + i ]), i, ipiv2[i], i, crealf(A2[ i * lda + i ])); break; } } dparam[TIMING_ANORM] = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A, lda, work); dparam[TIMING_XNORM] = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A2, lda, work); dparam[TIMING_BNORM] = 0.0; CORE_caxpy( m, n, -1.0, A, lda, A2, lda); dparam[TIMING_RES] = LAPACKE_clange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaMaxNorm), m, n, A2, lda, work); free( A2 ); free( ipiv2 ); free( work ); } /* Deallocate Workspace */ PLASMA_Desc_Destroy(&descA); free( A ); free( AT ); free( ipiv ); PLASMA_Finalize(); return 0; }