/*------------------------------------------------------------------------ * Check the factorization of the matrix A2 */ static int check_factorization(int N, float *A1, float *A2, int LDA, int uplo, float eps) { float alpha = 1.0; float Anorm, Rnorm, result; int info_factorization; int i,j; float *Residual = (float *)malloc(N*N*sizeof(float)); float *L1 = (float *)malloc(N*N*sizeof(float)); float *L2 = (float *)malloc(N*N*sizeof(float)); float *work = (float *)malloc(N*sizeof(float)); memset((void*)L1, 0, N*N*sizeof(float)); memset((void*)L2, 0, N*N*sizeof(float)); LAPACKE_slacpy_work(LAPACK_COL_MAJOR,' ', N, N, A1, LDA, Residual, N); /* Dealing with L'L or U'U */ LAPACKE_slacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, L1, N); LAPACKE_slacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, L2, N); if (uplo == PlasmaUpper) cblas_strmm(CblasColMajor, CblasLeft, (CBLAS_UPLO)uplo, CblasTrans, CblasNonUnit, N, N, (alpha), L1, N, L2, N); else cblas_strmm(CblasColMajor, CblasRight, (CBLAS_UPLO)uplo, CblasTrans, CblasNonUnit, N, N, (alpha), L1, N, L2, N); /* Compute the Residual || A - L'L|| */ for (i = 0; i < N; i++) for (j = 0; j < N; j++) Residual[j*N+i] = L2[j*N+i] - Residual[j*N+i]; Rnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N, work); Anorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work); result = Rnorm / ( Anorm * N * eps ); printf("============\n"); printf("Checking the Cholesky Factorization \n"); printf("-- ||L'L-A||_oo/(||A||_oo.N.eps) = %e \n", result); if ( isnan(result) || isinf(result) || (result > 60.0) ){ printf("-- Factorization is suspicious ! \n"); info_factorization = 1; } else{ printf("-- Factorization is CORRECT ! \n"); info_factorization = 0; } free(Residual); free(L1); free(L2); free(work); return info_factorization; }
lapack_int LAPACKE_slacpy( int matrix_layout, char uplo, lapack_int m, lapack_int n, const float* a, lapack_int lda, float* b, lapack_int ldb ) { if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { LAPACKE_xerbla( "LAPACKE_slacpy", -1 ); return -1; } #ifndef LAPACK_DISABLE_NAN_CHECK /* Optionally check input matrices for NaNs */ if( LAPACKE_sge_nancheck( matrix_layout, m, n, a, lda ) ) { return -5; } #endif return LAPACKE_slacpy_work( matrix_layout, uplo, m, n, a, lda, b, ldb ); }
static int RunTest(int *iparam, float *dparam, real_Double_t *t_) { float *A, *Acpy = NULL, *b, *x; real_Double_t t; int *piv; int m = iparam[TIMING_M]; int n = iparam[TIMING_N]; int nrhs = iparam[TIMING_NRHS]; int check = iparam[TIMING_CHECK]; int lda = m; int ldb = m; /* Allocate Data */ A = (float *)malloc(lda*n*sizeof(float)); piv = (int *)malloc( min(m, n) * sizeof(int)); /* Check if unable to allocate memory */ if ( !A || !piv ){ printf("Out of Memory \n "); return -1; } /* Initialize Plasma */ PLASMA_Init( iparam[TIMING_THRDNBR] ); if ( iparam[TIMING_SCHEDULER] ) PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_DYNAMIC_SCHEDULING ); else PLASMA_Set(PLASMA_SCHEDULING_MODE, PLASMA_STATIC_SCHEDULING ); /*if ( !iparam[TIMING_AUTOTUNING] ) {*/ PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, iparam[TIMING_NB] ); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, iparam[TIMING_IB] ); /* } else { */ /* PLASMA_Get(PLASMA_TILE_SIZE, &iparam[TIMING_NB] ); */ /* PLASMA_Get(PLASMA_INNER_BLOCK_SIZE, &iparam[TIMING_IB] ); */ /* } */ /* Initialize Data */ /*LAPACKE_slarnv_work(1, ISEED, n*lda, A);*/ PLASMA_splrnt(m, n, A, lda, 3456); /* Save AT in lapack layout for check */ if ( check && (m == n) ) { Acpy = (float *)malloc(lda*n*sizeof(float)); LAPACKE_slacpy_work(LAPACK_COL_MAJOR, 'A', m, n, A, lda, Acpy, lda); } t = -cWtime(); PLASMA_sgetrf( m, n, A, lda, piv ); t += cWtime(); *t_ = t; /* Check the solution */ if ( check && (m == n) ) { b = (float *)malloc(ldb*nrhs *sizeof(float)); x = (float *)malloc(ldb*nrhs *sizeof(float)); LAPACKE_slarnv_work(1, ISEED, ldb*nrhs, x); LAPACKE_slacpy_work(LAPACK_COL_MAJOR, 'A', n, nrhs, x, ldb, b, ldb); PLASMA_sgetrs( PlasmaNoTrans, n, nrhs, A, lda, piv, x, ldb ); dparam[TIMING_RES] = s_check_solution(m, n, nrhs, Acpy, lda, b, x, ldb, &(dparam[TIMING_ANORM]), &(dparam[TIMING_BNORM]), &(dparam[TIMING_XNORM])); free( Acpy ); free( b ); free( x ); } free( A ); free( piv ); PLASMA_Finalize(); return 0; }
int testing_ssygst(int argc, char **argv) { /* Check for number of arguments*/ if (argc != 3) { USAGE("HEGST", "N LDA LDB", " - N : size of the matrices A and B\n" " - LDA : leading dimension of the matrix A\n" " - LDB : leading dimension of the matrix B\n"); return -1; } float eps = LAPACKE_slamch_work('e'); int N = atoi(argv[0]); int LDA = atoi(argv[1]); int LDB = atoi(argv[2]); int info_transformation, info_factorization; int i, u; int LDAxN = LDA*N; int LDBxN = LDB*N; float *A1 = (float *)malloc(LDAxN*sizeof(float)); float *A2 = (float *)malloc(LDAxN*sizeof(float)); float *B1 = (float *)malloc(LDBxN*sizeof(float)); float *B2 = (float *)malloc(LDBxN*sizeof(float)); float *Ainit = (float *)malloc(LDAxN*sizeof(float)); float *Binit = (float *)malloc(LDBxN*sizeof(float)); /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!B1)||(!B2)||(!Ainit)||(!Binit)){ printf("Out of Memory \n "); return -2; } /*---------------------------------------------------------- * TESTING SSYGST */ /* Initialize A1 and A2 */ PLASMA_splgsy(0., N, A1, LDA, 5198); LAPACKE_slacpy_work(LAPACK_COL_MAJOR, 'A', N, N, A1, LDA, Ainit, LDA); /* Initialize B1 and B2 */ PLASMA_splgsy((float)N, N, B1, LDB, 4231); LAPACKE_slacpy_work(LAPACK_COL_MAJOR, 'A', N, N, B1, LDB, Binit, LDB); printf("\n"); printf("------ TESTS FOR PLASMA SSYGST ROUTINE ------- \n"); printf(" Size of the Matrix %d by %d\n", N, N); printf("\n"); printf(" The matrices A and B are randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 60.\n"); /*---------------------------------------------------------- * TESTING SSYGST */ for (i=0; i<3; i++) { for (u=0; u<2; u++) { memcpy(A2, Ainit, LDAxN*sizeof(float)); memcpy(B2, Binit, LDBxN*sizeof(float)); PLASMA_spotrf(uplo[u], N, B2, LDB); PLASMA_ssygst(itype[i], uplo[u], N, A2, LDA, B2, LDB); /* Check the Cholesky factorization and the transformation */ info_factorization = check_factorization(N, B1, B2, LDB, uplo[u], eps); info_transformation = check_transformation(itype[i], uplo[u], N, A1, A2, LDA, B2, LDB, eps); if ( (info_transformation == 0) && (info_factorization == 0) ) { printf("***************************************************\n"); printf(" ---- TESTING SSYGST (%s, %s) ....... PASSED !\n", itypestr[i], uplostr[u]); printf("***************************************************\n"); } else { printf("************************************************\n"); printf(" - TESTING SSYGST (%s, %s) ... FAILED !\n", itypestr[i], uplostr[u]); printf("************************************************\n"); } } } free(A1); free(A2); free(B1); free(B2); free(Ainit); free(Binit); return 0; }
static int check_transformation(int itype, int uplo, int N, float *A1, float *A2, int LDA, float *B2, int LDB, float eps) { float alpha = 1.0; float Anorm, Rnorm, result; int info_transformation; int i, j; char *str; float *Residual = (float *)malloc(N*N*sizeof(float)); float *Aorig = (float *)malloc(N*N*sizeof(float)); float *work = (float *)malloc(N*sizeof(float)); LAPACKE_slacpy_work(LAPACK_COL_MAJOR, 'a', N, N, A1, LDA, Residual, N); LAPACKE_slacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, Aorig, N); /* Rebuild the symmetry of A2 */ if (uplo == PlasmaLower) { for (j = 0; j < N; j++) for (i = j+1; i < N; i++) Aorig[j+i*N] = (Aorig[i+j*N]); } else { for (i = 0; i < N; i++) for (j = i+1; j < N; j++) Aorig[j+i*N] = (Aorig[i+j*N]); } if (itype == 1) { if (uplo == PlasmaLower) { str = "L*A2*L'"; cblas_strmm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); cblas_strmm(CblasColMajor, CblasRight, CblasLower, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); } else{ str = "U'*A2*U"; cblas_strmm(CblasColMajor, CblasLeft, CblasUpper, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); cblas_strmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); } } else { if (uplo == PlasmaLower) { str = "inv(L')*A2*inv(L)"; cblas_strsm(CblasColMajor, CblasLeft, CblasLower, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); cblas_strsm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); } else{ str = "inv(U)*A2*inv(U')"; cblas_strsm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); cblas_strsm(CblasColMajor, CblasRight, CblasUpper, CblasTrans, CblasNonUnit, N, N, (alpha), B2, LDB, Aorig, N); } } /* Compute the Residual ( A1 - W ) */ for (i = 0; i < N; i++) for (j = 0; j < N; j++) Residual[j*N+i] = Aorig[j*N+i] - Residual[j*N+i]; Rnorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N, work); Anorm = LAPACKE_slange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work); result = Rnorm / (Anorm * N *eps); printf("============\n"); printf("Checking the global transformation \n"); printf("-- ||A1-%s||_oo/(||A1||_oo.N.eps) = %e \n", str, result ); if (isnan(result) || isinf(result) || (result > 60.0) ) { printf("-- Transformation is suspicious ! \n"); info_transformation = 1; } else { printf("-- Transformation is CORRECT ! \n"); info_transformation = 0; } free(Residual); free(Aorig); free(work); return info_transformation; }
int CORE_stsrfb(int side, int trans, int direct, int storev, int M1, int N1, int M2, int N2, int K, float *A1, int LDA1, float *A2, int LDA2, float *V, int LDV, float *T, int LDT, float *WORK, int LDWORK) { static float zone = 1.0; static float mzone = -1.0; int j; /* Check input arguments */ if (M1 < 0) { coreblas_error(5, "Illegal value of M1"); return -5; } if (N1 < 0) { coreblas_error(6, "Illegal value of N1"); return -6; } if ( (M2 < 0) || ( (M2 != M1) && (side == PlasmaRight) ) ){ coreblas_error(7, "Illegal value of M2"); return -7; } if ( (N2 < 0) || ( (N2 != N1) && (side == PlasmaLeft) ) ){ coreblas_error(8, "Illegal value of N2"); return -8; } if (K < 0) { coreblas_error(9, "Illegal value of K"); return -9; } /* Quick return */ if ((M1 == 0) || (N1 == 0) || (M2 == 0) || (N2 == 0) || (K == 0)) return PLASMA_SUCCESS; if (storev == PlasmaColumnwise) { if (direct == PlasmaForward) { if (side == PlasmaLeft) { /* * B = A1 + V' * A2 */ LAPACKE_slacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), K, N1, A1, LDA1, WORK, LDWORK); cblas_sgemm( CblasColMajor, CblasTrans, CblasNoTrans, K, N2, M2, (zone), V, LDV, A2, LDA2, (zone), WORK, LDWORK); /* * A2 = A2 - V*T*B -> B = T*B, A2 = A2 - V*B */ cblas_strmm( CblasColMajor, CblasLeft, CblasUpper, (CBLAS_TRANSPOSE)trans, CblasNonUnit, K, N2, (zone), T, LDT, WORK, LDWORK); cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, M2, N2, K, (mzone), V, LDV, WORK, LDWORK, (zone), A2, LDA2); /* * A1 = A1 - B */ for(j = 0; j < N1; j++) { cblas_saxpy( K, (mzone), &WORK[LDWORK*j], 1, &A1[LDA1*j], 1); } } /* * Columnwise / Forward / Right */ else { /* * B = A1 + A2 * V */ LAPACKE_slacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), M1, K, A1, LDA1, WORK, LDWORK); cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, M2, K, N2, (zone), A2, LDA2, V, LDV, (zone), WORK, LDWORK); /* * A2 = A2 - B*T*V' -> B = B*T, A2 = A2 - B*V' */ cblas_strmm( CblasColMajor, CblasRight, CblasUpper, (CBLAS_TRANSPOSE)trans, CblasNonUnit, M1, K, (zone), T, LDT, WORK, LDWORK); cblas_sgemm( CblasColMajor, CblasNoTrans, CblasTrans, M2, N2, K, (mzone), WORK, LDWORK, V, LDV, (zone), A2, LDA2); /* * A1 = A1 - B */ for(j = 0; j < K; j++) { cblas_saxpy( M1, (mzone), &WORK[LDWORK*j], 1, &A1[LDA1*j], 1); } } } else { coreblas_error(3, "Not implemented (ColMajor / Backward / Left or Right)"); return PLASMA_ERR_NOT_SUPPORTED; } } else { if (direct == PlasmaForward) { /* * Rowwise / Forward / Left */ if (side == PlasmaLeft) { /* * B = A1 + V * A2 */ LAPACKE_slacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), K, N1, A1, LDA1, WORK, LDWORK); cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, K, N2, M2, (zone), V, LDV, A2, LDA2, (zone), WORK, LDWORK); /* * A2 = A2 - V'*T*B -> B = T*B, A2 = A2 - V'*B */ cblas_strmm( CblasColMajor, CblasLeft, CblasUpper, (CBLAS_TRANSPOSE)trans, CblasNonUnit, K, N2, (zone), T, LDT, WORK, LDWORK); cblas_sgemm( CblasColMajor, CblasTrans, CblasNoTrans, M2, N2, K, (mzone), V, LDV, WORK, LDWORK, (zone), A2, LDA2); /* * A1 = A1 - B */ for(j=0; j<N1; j++) { cblas_saxpy( K, (mzone), &WORK[LDWORK*j], 1, &A1[LDA1*j], 1); } } /* * Rowwise / Forward / Right */ else { /* * B = A1 + A2 * V' */ LAPACKE_slacpy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaUpperLower), M1, K, A1, LDA1, WORK, LDWORK); cblas_sgemm( CblasColMajor, CblasNoTrans, CblasTrans, M2, K, N2, (zone), A2, LDA2, V, LDV, (zone), WORK, LDWORK); /* * A2 = A2 - B*T*V -> B = B*T, A2 = A2 - B*V' */ cblas_strmm( CblasColMajor, CblasRight, CblasUpper, (CBLAS_TRANSPOSE)trans, CblasNonUnit, M1, K, (zone), T, LDT, WORK, LDWORK); cblas_sgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, M2, N2, K, (mzone), WORK, LDWORK, V, LDV, (zone), A2, LDA2); /* * A1 = A1 - B */ for(j = 0; j < K; j++) { cblas_saxpy( M1, (mzone), &WORK[LDWORK*j], 1, &A1[LDA1*j], 1); } } } else { coreblas_error(3, "Not implemented (RowMajor / Backward / Left or Right)"); return PLASMA_ERR_NOT_SUPPORTED; } } return PLASMA_SUCCESS; }