lapack_int LAPACKE_dlacpy( int matrix_layout, char uplo, lapack_int m, lapack_int n, const double* a, lapack_int lda, double* b, lapack_int ldb ) { if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_dlacpy", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK if( LAPACKE_get_nancheck() ) { /* Optionally check input matrices for NaNs */ if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) { return -5; } } #endif return LAPACKE_dlacpy_work( matrix_layout, uplo, m, n, a, lda, b, ldb ); }
int check_factorization(int M, int N, double *A1, double *A2, int LDA, double *Q) { double Anorm, Rnorm; double alpha, beta; int info_factorization; int i,j; double eps; eps = LAPACKE_dlamch_work('e'); double *Ql = (double *)malloc(M*N*sizeof(double)); double *Residual = (double *)malloc(M*N*sizeof(double)); double *work = (double *)malloc(max(M,N)*sizeof(double)); alpha=1.0; beta=0.0; if (M >= N) { /* Extract the R */ double *R = (double *)malloc(N*N*sizeof(double)); memset((void*)R, 0, N*N*sizeof(double)); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,'u', M, N, A2, LDA, R, N); /* Perform Ql=Q*R */ memset((void*)Ql, 0, M*N*sizeof(double)); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, N, (alpha), Q, LDA, R, N, (beta), Ql, M); free(R); } else { /* Extract the L */ double *L = (double *)malloc(M*M*sizeof(double)); memset((void*)L, 0, M*M*sizeof(double)); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,'l', M, N, A2, LDA, L, M); /* Perform Ql=LQ */ memset((void*)Ql, 0, M*N*sizeof(double)); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, M, (alpha), L, M, Q, LDA, (beta), Ql, M); free(L); } /* Compute the Residual */ for (i = 0; i < M; i++) for (j = 0 ; j < N; j++) Residual[j*M+i] = A1[j*LDA+i]-Ql[j*M+i]; Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Residual, M, work); Anorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, A2, LDA, work); if (M >= N) { printf("============\n"); printf("Checking the QR Factorization \n"); printf("-- ||A-QR||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps)); } else { printf("============\n"); printf("Checking the LQ Factorization \n"); printf("-- ||A-LQ||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps)); } if (isnan(Rnorm / (Anorm * N *eps)) || (Rnorm / (Anorm * N * eps) > 10.0) ) { printf("-- Factorization is suspicious ! \n"); info_factorization = 1; } else { printf("-- Factorization is CORRECT ! \n"); info_factorization = 0; } free(work); free(Ql); free(Residual); return info_factorization; }
static int RunTest(int *iparam, double *dparam, real_Double_t *t_) { double *A, *b, *x; double *Acpy = NULL; double *bcpy = NULL; real_Double_t t; int *piv; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = n; int ldb = n; int iter = 0; /* Allocate Data */ A = (double *)malloc(lda*n* sizeof(double)); b = (double *)malloc(ldb*nrhs*sizeof(double)); x = (double *)malloc(ldb*nrhs*sizeof(double)); piv = (int *)malloc( n*sizeof(int)); /* Check if unable to allocate memory */ if ( (!A) || (!b) || (!x) || (!piv) ) { printf("Out of Memory \n "); return -1; } /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } */ /* Initialiaze Data */ LAPACKE_dlarnv_work(1, ISEED, lda*n, A); LAPACKE_dlarnv_work(1, ISEED, ldb*nrhs, b); /* Save A and b */ if (check) { Acpy = (double *)malloc(lda*n* sizeof(double)); bcpy = (double *)malloc(ldb*nrhs*sizeof(double)); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,' ', n, n, A, lda, Acpy, lda); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,' ', n, nrhs, b, ldb, bcpy, ldb); } t = -cWtime(); PLASMA_dsgesv( n, nrhs, A, lda, piv, b, ldb, x, ldb, &iter ); t += cWtime(); *t_ = t; /* Check the solution */ if (check) { dparam[TIMING_RES] = d_check_solution(n, n, nrhs, Acpy, lda, bcpy, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); free(Acpy); free(bcpy); } free( piv ); free( x ); free( b ); free( A ); PLASMA_Finalize(); return 0; }
static inline int CORE_dpamm_w(PLASMA_enum side, PLASMA_enum trans, PLASMA_enum uplo, int M, int N, int K, int L, int vi2, int vi3, const double *A1, int LDA1, double *A2, int LDA2, const double *V, int LDV, double *W, int LDW) { /* * W = A1 + op(V) * A2 or W = A1 + A2 * op(V) */ int j; static double zone = 1.0; static double zzero = 0.0; if (side == PlasmaLeft) { if (((trans == PlasmaTrans) && (uplo == CblasUpper)) || ((trans == PlasmaNoTrans) && (uplo == CblasLower))) { /* * W = A1 + V' * A2 */ /* W = A2_2 */ LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), L, N, &A2[K-L], LDA2, W, LDW); /* W = V_2' * W + V_1' * A2_1 (ge+tr, top L rows of V') */ if (L > 0) { /* W = V_2' * W */ cblas_dtrmm( CblasColMajor, CblasLeft, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, CblasNonUnit, L, N, (zone), &V[vi2], LDV, W, LDW); /* W = W + V_1' * A2_1 */ if (K > L) { cblas_dgemm( CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans, L, N, K-L, (zone), V, LDV, A2, LDA2, (zone), W, LDW); } } /* W_2 = V_3' * A2: (ge, bottom M-L rows of V') */ if (M > L) { cblas_dgemm( CblasColMajor, (CBLAS_TRANSPOSE)trans, CblasNoTrans, (M-L), N, K, (zone), &V[vi3], LDV, A2, LDA2, (zzero), &W[L], LDW); } /* W = A1 + W */ for(j = 0; j < N; j++) { cblas_daxpy( M, (zone), &A1[LDA1*j], 1, &W[LDW*j], 1); } } else { printf("Left Upper/NoTrans & Lower/ConjTrans not implemented yet\n"); return PLASMA_ERR_NOT_SUPPORTED; } } else { //side right if (((trans == PlasmaTrans) && (uplo == CblasUpper)) || ((trans == PlasmaNoTrans) && (uplo == CblasLower))) { printf("Right Upper/ConjTrans & Lower/NoTrans not implemented yet\n"); return PLASMA_ERR_NOT_SUPPORTED; } else { /* * W = A1 + A2 * V */ if (L > 0) { /* W = A2_2 */ LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), M, L, &A2[LDA2*(K-L)], LDA2, W, LDW); /* W = W * V_2 --> W = A2_2 * V_2 */ cblas_dtrmm( CblasColMajor, CblasRight, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, CblasNonUnit, M, L, (zone), &V[vi2], LDV, W, LDW); /* W = W + A2_1 * V_1 */ if (K > L) { cblas_dgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)trans, M, L, K-L, (zone), A2, LDA2, V, LDV, (zone), W, LDW); } } /* W = W + A2 * V_3 */ if (N > L) { cblas_dgemm( CblasColMajor, CblasNoTrans, (CBLAS_TRANSPOSE)trans, M, N-L, K, (zone), A2, LDA2, &V[vi3], LDV, (zzero), &W[LDW*L], LDW); } /* W = A1 + W */ for (j = 0; j < N; j++) { cblas_daxpy( M, (zone), &A1[LDA1*j], 1, &W[LDW*j], 1); } } } return PLASMA_SUCCESS; }
int testing_dsygv(int argc, char **argv) { /* Check for number of arguments*/ if (argc != 3) { USAGE("HEGV", "N LDA LDB", " - N : size of the matrices A and B\n" " - LDA : leading dimension of the matrix A\n" " - LDB : leading dimension of the matrix B\n"); return -1; } double eps = LAPACKE_dlamch_work('e'); PLASMA_enum vec = PlasmaNoVec; int N = atoi(argv[0]); int LDA = atoi(argv[1]); int LDB = atoi(argv[2]); int LDQ = LDA; int LDAxN = LDA*N; int LDBxN = LDB*N; int LDQxN = LDQ*N; int info_ortho = 0; int info_solution = 0; int info_reduction = 0; int i, u; double *A1 = (double *)malloc(LDAxN*sizeof(double)); double *A2 = (double *)malloc(LDAxN*sizeof(double)); double *B1 = (double *)malloc(LDBxN*sizeof(double)); double *B2 = (double *)malloc(LDBxN*sizeof(double)); double *Q = (double *)malloc(LDQxN*sizeof(double)); double *Ainit = (double *)malloc(LDAxN*sizeof(double)); double *Binit = (double *)malloc(LDBxN*sizeof(double)); double *W1 = (double *)malloc(N*sizeof(double)); double *W2 = (double *)malloc(N*sizeof(double)); double *work = (double *)malloc(3*N* sizeof(double)); PLASMA_desc *T; /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!B1)||(!B2)||(!Q)||(!Ainit)||(!Binit)){ printf("Out of Memory \n "); return -2; } /* PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, 120); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, 20); */ PLASMA_Enable(PLASMA_WARNINGS); PLASMA_Enable(PLASMA_ERRORS); PLASMA_Alloc_Workspace_dsygv(N, N, &T); /*---------------------------------------------------------- * TESTING DSYGV */ /* Initialize A1 and Ainit */ PLASMA_dplgsy(0., N, A1, LDA, 5198); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, 'A', N, N, A1, LDA, Ainit, LDA); /* Initialize B1 and Binit */ PLASMA_dplgsy((double)N, N, B1, LDB, 4321 ); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, 'A', N, N, B1, LDB, Binit, LDB); printf("\n"); printf("------ TESTS FOR PLASMA DSYGV ROUTINE ------- \n"); printf(" Size of the Matrix %d by %d\n", N, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 60.\n"); /*---------------------------------------------------------- * TESTING DSYGV */ for (i=0; i<3; i++) { for (u=0; u<2; u++) { LAPACKE_dlaset_work(LAPACK_COL_MAJOR, 'A', LDA, N, 0., 1., Q, LDA); memcpy(A2, Ainit, LDAxN*sizeof(double)); memcpy(B2, Binit, LDBxN*sizeof(double)); PLASMA_dsygv(itype[i], vec, uplo[u], N, A2, LDA, B2, LDB, W2, T, Q, LDQ); /* Check the orthogonality, reduction and the eigen solutions */ if (vec == PlasmaVec) info_ortho = check_orthogonality(N, N, Q, LDA, eps); /* * WARNING: For now, Q is associated to Band tridiagonal reduction and * not to the final tridiagonal reduction, so we can not call the check */ if (0) info_reduction = check_reduction(itype[i], uplo[u], N, 1, A1, A2, LDA, B2, LDB, Q, eps); memcpy(A1, Ainit, LDAxN*sizeof(double)); memcpy(B1, Binit, LDBxN*sizeof(double)); LAPACKE_dsygv( LAPACK_COL_MAJOR, itype[i], lapack_const(vec), lapack_const(uplo[u]), N, A1, LDA, B1, LDB, W1); /*info_solution = check_solution(N, N, N, A1, LDA, B1, B2, LDB, eps);*/ info_solution = check_solution(N, W1, W2, eps); if ( (info_ortho == 0) & (info_reduction == 0) & (info_solution == 0)) { printf("***************************************************\n"); printf(" ---- TESTING DSYGV (%s, %s) ...................... PASSED !\n", itypestr[i], uplostr[u]); printf("***************************************************\n"); } else { printf("************************************************\n"); printf(" - TESTING DSYGV (%s, %s) ... FAILED !\n", itypestr[i], uplostr[u]); printf("************************************************\n"); } } } PLASMA_Dealloc_Handle_Tile(&T); free(A1); free(A2); free(B1); free(B2); free(Q); free(Ainit); free(Binit); free(W1); free(W2); free(work); return 0; }
static int check_reduction(int itype, int uplo, int N, int bw, double *A1, double *A2, int LDA, double *B2, int LDB, double *Q, double eps ) { double alpha = 1.0; double beta = 0.0; double Anorm, Rnorm, result; int info_reduction; int i, j; char *str; double *Aorig = (double *)malloc(N*N*sizeof(double)); double *Residual = (double *)malloc(N*N*sizeof(double)); double *T = (double *)malloc(N*N*sizeof(double)); double *work = (double *)malloc(N*sizeof(double)); memset((void*)T, 0, N*N*sizeof(double)); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, ' ', N, N, A1, LDA, Residual, N); /* Rebuild the T */ LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, T, N); if (uplo == PlasmaLower) { /* Set the reflectors to 0 */ for (i = bw+1; i < N; i++) for (j = 0 ; (j < N) && (j < i-bw); j++) T[j*N+i] = 0.; /* Copy the lower part to the upper part to rebuild the symmetry */ for (i = 0; i < N; i++) for (j = 0 ; j < i; j++) T[i*N+j] = (T[j*N+i]); } else { /* Set the reflectors to 0 */ for (j = bw+1; j < N; j++) for (i = 0 ; (i < N) && (i < j-bw); i++) T[j*N+i] = 0.; /* Copy the upper part to the lower part to rebuild the symmetry */ for (i = 0; i < N; i++) for (j = i+1 ; j < N; j++) T[i*N+j] = (T[j*N+i]); } memset((void*)Aorig, 0, N*N*sizeof(double)); if (itype == 1) { if (uplo == PlasmaLower) { str = "L*Q*T*Q'*L'"; /* Compute Aorig=Q*T*Q' */ cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, N, N, (alpha), Q, LDA, T, N, (beta), Aorig, N); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, N, N, N, (alpha), Aorig, N, Q, LDA, (beta), T, N); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, PlasmaUpperLower, N, N, T, N, Aorig, N); cblas_dtrmm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); cblas_dtrmm(CblasColMajor, CblasRight, CblasLower, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); } else { str = "U'*Q*T*Q'*U"; /* Compute Aorig=Q'*T*Q */ cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, N, N, N, (alpha), Q, LDA, T, N, (beta), Aorig, N); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, N, N, (alpha), Aorig, N, Q, LDA, (beta), T, N); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, 'A', N, N, T, N, Aorig, N); cblas_dtrmm(CblasColMajor, CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); cblas_dtrmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); } } else { if (uplo == PlasmaLower) { str = "inv(L')*Q*A2*Q'*inv(L)"; /* Compute Aorig=Q*T*Q' */ cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, N, N, (alpha), Q, LDA, T, N, (beta), Aorig, N); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, N, N, N, (alpha), Aorig, N, Q, LDA, (beta), T, N ); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, 'A', N, N, T, N, Aorig, N); cblas_dtrsm(CblasColMajor, CblasLeft, CblasLower, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); cblas_dtrsm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); } else{ str = "inv(U)*Q*A2*Q'*inv(U')"; /* Compute Aorig=Q'*T*Q */ cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, N, N, N, (alpha), Q, LDA, T, N, (beta), Aorig, N); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, N, N, (alpha), Aorig, N, Q, LDA, (beta), T, N); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, 'A', N, N, T, N, Aorig, N); cblas_dtrsm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); cblas_dtrsm(CblasColMajor, CblasRight, CblasUpper, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); } } /* Compute the Residual */ for (i = 0; i < N; i++) for (j = 0 ; j < N; j++) Residual[j*N+i] = A1[j*LDA+i]-Aorig[j*N+i]; Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N, work); Anorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A2, LDA, work); result = Rnorm / (Anorm * N *eps); printf("============\n"); printf("Checking the tridiagonal reduction \n"); printf("-- ||A-%s||_oo/(||A||_oo.N.eps) = %e \n", str, result ); if (isnan(result) || isinf(result) || (result > 60.0) ) { printf("-- Reduction is suspicious ! \n"); info_reduction = 1; } else { printf("-- Reduction is CORRECT ! \n"); info_reduction = 0; } free(Aorig); free(Residual); free(T); free(work); return info_reduction; }