void CORE_dgetrf_rectil_init(void) { int i; for (i = 0; i < AMAX1BUF_SIZE; ++i) CORE_damax1buf[i] = -1.0; sfmin = LAPACKE_dlamch_work('S'); }
static int check_solution(PLASMA_enum uplo, PLASMA_enum trans, int N, int K, double alpha, double *A, int LDA, double *B, int LDB, double beta, double *Cref, double *Cplasma, int LDC) { int info_solution; double Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm, result; double eps; double beta_const; double *work = (double *)malloc(max(N, K)* sizeof(double)); beta_const = -1.0; Anorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), (trans == PlasmaNoTrans) ? N : K, (trans == PlasmaNoTrans) ? K : N, A, LDA, work); Bnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), (trans == PlasmaNoTrans) ? N : K, (trans == PlasmaNoTrans) ? K : N, B, LDB, work); Cinitnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); Cplasmanorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cplasma, LDC, work); cblas_dsyr2k(CblasColMajor, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, N, K, (alpha), A, LDA, B, LDB, (beta), Cref, LDC); Clapacknorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); cblas_daxpy(LDC*N, (beta_const), Cplasma, 1, Cref, 1); Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Cref, LDC, work); eps = LAPACKE_dlamch_work('e'); printf("Rnorm %e, Anorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n", Rnorm, Anorm, Cinitnorm, Cplasmanorm, Clapacknorm); result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps); printf("============\n"); printf("Checking the norm of the difference against reference DSYR2K \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||C||_oo).N.eps) = %e \n", result); if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
int check_solution(int M, int N, int NRHS, double *A1, int LDA, double *B1, double *B2, int LDB) { int info_solution; double Rnorm, Anorm, Xnorm, Bnorm; double alpha, beta; double *work = (double *)malloc(max(M, N)* sizeof(double)); double eps; eps = LAPACKE_dlamch_work('e'); alpha = 1.0; beta = -1.0; Anorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, A1, LDA, work); Xnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, NRHS, B2, LDB, work); Bnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, NRHS, N, (alpha), A1, LDA, B2, LDB, (beta), B1, LDB); if (M >= N) { double *Residual = (double *)malloc(M*NRHS*sizeof(double)); memset((void*)Residual, 0, M*NRHS*sizeof(double)); cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, N, NRHS, M, (alpha), A1, LDA, B1, LDB, (beta), Residual, M); Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, NRHS, Residual, M, work); free(Residual); } else { double *Residual = (double *)malloc(N*NRHS*sizeof(double)); memset((void*)Residual, 0, N*NRHS*sizeof(double)); cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, N, NRHS, M, (alpha), A1, LDA, B1, LDB, (beta), Residual, N); Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, Residual, N, work); free(Residual); } printf("============\n"); printf("Checking the Residual of the solution \n"); printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||)_oo.N.eps) = %e \n",Rnorm/((Anorm*Xnorm+Bnorm)*N*eps)); if (isnan(Rnorm / ((Anorm * Xnorm + Bnorm) * N * eps)) || (Rnorm / ((Anorm * Xnorm + Bnorm) * N * eps) > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
static int check_solution(PLASMA_enum side, PLASMA_enum uplo, int M, int N, PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB, PLASMA_Complex64_t beta, PLASMA_Complex64_t *Cref, PLASMA_Complex64_t *Cplasma, int LDC) { int info_solution, NrowA; double Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm; double eps; PLASMA_Complex64_t beta_const; double result; double *work = (double *)malloc(max(M, N)* sizeof(double)); beta_const = (PLASMA_Complex64_t)-1.0; NrowA = (side == PlasmaLeft) ? M : N; Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), NrowA, NrowA, A, LDA, work); Bnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, B, LDB, work); Cinitnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); Cplasmanorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cplasma, LDC, work); cblas_zsymm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, M, N, CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC); Clapacknorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); cblas_zaxpy(LDC * N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1); Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); eps = LAPACKE_dlamch_work('e'); printf("Rnorm %e, Anorm %e, Bnorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n",Rnorm,Anorm,Bnorm,Cinitnorm,Cplasmanorm,Clapacknorm); result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps); printf("============\n"); printf("Checking the norm of the difference against reference ZSYMM \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||B||_oo+||C||_oo).N.eps) = %e \n", result ); if ( isinf(Clapacknorm) || isinf(Cplasmanorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
int check_orthogonality(int M, int N, int LDQ, double *Q) { double alpha, beta; double normQ; int info_ortho; int i; int minMN = min(M, N); double eps; double *work = (double *)malloc(minMN*sizeof(double)); eps = LAPACKE_dlamch_work('e'); alpha = 1.0; beta = -1.0; /* Build the idendity matrix USE DLASET?*/ double *Id = (double *) malloc(minMN*minMN*sizeof(double)); memset((void*)Id, 0, minMN*minMN*sizeof(double)); for (i = 0; i < minMN; i++) Id[i*minMN+i] = (double)1.0; /* Perform Id - Q'Q */ if (M >= N) cblas_dsyrk(CblasColMajor, CblasUpper, CblasTrans, N, M, alpha, Q, LDQ, beta, Id, N); else cblas_dsyrk(CblasColMajor, CblasUpper, CblasNoTrans, M, N, alpha, Q, LDQ, beta, Id, M); normQ = LAPACKE_dlansy_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), 'u', minMN, Id, minMN, work); printf("============\n"); printf("Checking the orthogonality of Q \n"); printf("||Id-Q'*Q||_oo / (N*eps) = %e \n",normQ/(minMN*eps)); if ( isnan(normQ / (minMN * eps)) || (normQ / (minMN * eps) > 10.0) ) { printf("-- Orthogonality is suspicious ! \n"); info_ortho=1; } else { printf("-- Orthogonality is CORRECT ! \n"); info_ortho=0; } free(work); free(Id); return info_ortho; }
int check_solution(int N, int NRHS, PLASMA_Complex64_t *A1, int LDA, PLASMA_Complex64_t *B1, PLASMA_Complex64_t *B2, int LDB) { int info_solution; double Rnorm, Anorm, Xnorm, Bnorm; PLASMA_Complex64_t alpha, beta; double *work = (double *)malloc(N*sizeof(double)); double eps; eps = LAPACKE_dlamch_work('e'); alpha = 1.0; beta = -1.0; Xnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B2, LDB, work); Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work); Bnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work); cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, NRHS, N, CBLAS_SADDR(alpha), A1, LDA, B2, LDB, CBLAS_SADDR(beta), B1, LDB); Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, NRHS, B1, LDB, work); printf("============\n"); printf("Checking the Residual of the solution \n"); printf("-- ||Ax-B||_oo/((||A||_oo||x||_oo+||B||_oo).N.eps) = %e \n",Rnorm/((Anorm*Xnorm+Bnorm)*N*eps)); if ( isnan(Rnorm/((Anorm*Xnorm+Bnorm)*N*eps)) || (Rnorm/((Anorm*Xnorm+Bnorm)*N*eps) > 10.0) ){ printf("-- The solution is suspicious ! \n"); info_solution = 1; } else{ printf("-- The solution is CORRECT ! \n"); info_solution = 0; } free(work); return info_solution; }
int testing_dsyr2k(int argc, char **argv) { /* Check for number of arguments*/ if ( argc != 7 ){ USAGE("SYR2K", "alpha beta M N LDA LDB LDC", " - alpha : alpha coefficient\n" " - beta : beta coefficient\n" " - N : number of columns and rows of matrix C and number of row of matrix A and B\n" " - K : number of columns of matrix A and B\n" " - LDA : leading dimension of matrix A\n" " - LDB : leading dimension of matrix B\n" " - LDC : leading dimension of matrix C\n"); return -1; } double alpha = (double) atol(argv[0]); double beta = (double) atol(argv[1]); int N = atoi(argv[2]); int K = atoi(argv[3]); int LDA = atoi(argv[4]); int LDB = atoi(argv[5]); int LDC = atoi(argv[6]); int NKmax = max(N, K); int NminusOne = N - 1; double eps; int info_solution; int info, u, t; size_t LDAxK = LDA*NKmax; size_t LDBxK = LDB*NKmax; size_t LDCxN = LDC*N; double *A = (double *)malloc(LDAxK*sizeof(double)); double *B = (double *)malloc(LDBxK*sizeof(double)); double *C = (double *)malloc(LDCxN*sizeof(double)); double *Cinit = (double *)malloc(LDCxN*sizeof(double)); double *Cfinal = (double *)malloc(LDCxN*sizeof(double)); double *WORK = (double *)malloc(2*LDC*sizeof(double)); double *D = (double *) malloc(LDC *sizeof(double)); /* Check if unable to allocate memory */ if ( (!A) || (!B) || (!Cinit) || (!Cfinal) || (!D) ){ printf("Out of Memory \n "); return -2; } eps = LAPACKE_dlamch_work('e'); printf("\n"); printf("------ TESTS FOR PLASMA DSYR2K ROUTINE ------- \n"); printf(" Size of the Matrix C %d by %d\n", N, K); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 10.\n"); /*---------------------------------------------------------- * TESTING DSYR2K */ /* Initialize A,B */ LAPACKE_dlarnv_work(IONE, ISEED, LDAxK, A); LAPACKE_dlarnv_work(IONE, ISEED, LDBxK, B); /* Initialize C */ LAPACKE_dlarnv_work(IONE, ISEED, LDC, D); dlagsy(&N, &NminusOne, D, C, &LDC, ISEED, WORK, &info); free(D); free(WORK); for (u=0; u<2; u++) { for (t=0; t<2; t++) { memcpy(Cinit, C, LDCxN*sizeof(double)); memcpy(Cfinal, C, LDCxN*sizeof(double)); /* PLASMA DSYR2K */ PLASMA_dsyr2k(uplo[u], trans[t], N, K, alpha, A, LDA, B, LDB, beta, Cfinal, LDC); /* Check the solution */ info_solution = check_solution(uplo[u], trans[t], N, K, alpha, A, LDA, B, LDB, beta, Cinit, Cfinal, LDC); if (info_solution == 0) { printf("***************************************************\n"); printf(" ---- TESTING DSYR2K (%5s, %s) ........... PASSED !\n", uplostr[u], transstr[t]); printf("***************************************************\n"); } else { printf("************************************************\n"); printf(" - TESTING DSYR2K (%5s, %s) ... FAILED !\n", uplostr[u], transstr[t]); printf("************************************************\n"); } } } free(A); free(B); free(C); free(Cinit); free(Cfinal); return 0; }
int testing_dtrmm(int argc, char **argv) { /* Check for number of arguments*/ if ( argc != 5 ) { USAGE("TRMM", "alpha M N LDA LDB", " - alpha : alpha coefficient\n" " - M : number of rows of matrices B\n" " - N : number of columns of matrices B\n" " - LDA : leading dimension of matrix A\n" " - LDB : leading dimension of matrix B\n"); return -1; } double alpha = (double) atol(argv[0]); int M = atoi(argv[1]); int N = atoi(argv[2]); int LDA = atoi(argv[3]); int LDB = atoi(argv[4]); double eps; int info_solution; int s, u, t, d, i; int LDAxM = LDA*max(M,N); int LDBxN = LDB*max(M,N); double *A = (double *)malloc(LDAxM*sizeof(double)); double *B = (double *)malloc(LDBxN*sizeof(double)); double *Binit = (double *)malloc(LDBxN*sizeof(double)); double *Bfinal = (double *)malloc(LDBxN*sizeof(double)); /* Check if unable to allocate memory */ if ( (!A) || (!B) || (!Binit) || (!Bfinal)){ printf("Out of Memory \n "); return -2; } eps = LAPACKE_dlamch_work('e'); printf("\n"); printf("------ TESTS FOR PLASMA DTRMM ROUTINE ------- \n"); printf(" Size of the Matrix B : %d by %d\n", M, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 10.\n"); /*---------------------------------------------------------- * TESTING DTRMM */ /* Initialize A, B, C */ LAPACKE_dlarnv_work(IONE, ISEED, LDAxM, A); LAPACKE_dlarnv_work(IONE, ISEED, LDBxN, B); for(i=0;i<max(M,N);i++) A[LDA*i+i] = A[LDA*i+i] + 2.0; for (s=0; s<2; s++) { for (u=0; u<2; u++) { #ifdef COMPLEX for (t=0; t<3; t++) { #else for (t=0; t<2; t++) { #endif for (d=0; d<2; d++) { memcpy(Binit, B, LDBxN*sizeof(double)); memcpy(Bfinal, B, LDBxN*sizeof(double)); /* PLASMA DTRMM */ PLASMA_dtrmm(side[s], uplo[u], trans[t], diag[d], M, N, alpha, A, LDA, Bfinal, LDB); /* Check the solution */ info_solution = check_solution(side[s], uplo[u], trans[t], diag[d], M, N, alpha, A, LDA, Binit, Bfinal, LDB); printf("***************************************************\n"); if (info_solution == 0) { printf(" ---- TESTING DTRMM (%s, %s, %s, %s) ...... PASSED !\n", sidestr[s], uplostr[u], transstr[t], diagstr[d]); } else { printf(" ---- TESTING DTRMM (%s, %s, %s, %s) ... FAILED !\n", sidestr[s], uplostr[u], transstr[t], diagstr[d]); } printf("***************************************************\n"); } } } } free(A); free(B); free(Binit); free(Bfinal); return 0; } /*-------------------------------------------------------------- * Check the solution */ static int check_solution(PLASMA_enum side, PLASMA_enum uplo, PLASMA_enum trans, PLASMA_enum diag, int M, int N, double alpha, double *A, int LDA, double *Bref, double *Bplasma, int LDB) { int info_solution; double Anorm, Binitnorm, Bplasmanorm, Blapacknorm, Rnorm, result; double eps; double mzone = (double)-1.0; double *work = (double *)malloc(max(M, N)* sizeof(double)); int Am, An; if (side == PlasmaLeft) { Am = M; An = M; } else { Am = N; An = N; } Anorm = LAPACKE_dlantr_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), lapack_const(uplo), lapack_const(diag), Am, An, A, LDA, work); Binitnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bref, LDB, work); Bplasmanorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bplasma, LDB, work); cblas_dtrmm(CblasColMajor, (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, (CBLAS_DIAG)diag, M, N, (alpha), A, LDA, Bref, LDB); Blapacknorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bref, LDB, work); cblas_daxpy(LDB * N, (mzone), Bplasma, 1, Bref, 1); Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Bref, LDB, work); eps = LAPACKE_dlamch_work('e'); printf("Rnorm %e, Anorm %e, Binitnorm %e, Bplasmanorm %e, Blapacknorm %e\n", Rnorm, Anorm, Binitnorm, Bplasmanorm, Blapacknorm); result = Rnorm / ((Anorm + Blapacknorm) * max(M,N) * eps); printf("============\n"); printf("Checking the norm of the difference against reference DTRMM \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||B||_oo).N.eps) = %e \n", result); if ( isinf(Blapacknorm) || isinf(Bplasmanorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
int check_factorization(int M, int N, double *A1, double *A2, int LDA, double *Q) { double Anorm, Rnorm; double alpha, beta; int info_factorization; int i,j; double eps; eps = LAPACKE_dlamch_work('e'); double *Ql = (double *)malloc(M*N*sizeof(double)); double *Residual = (double *)malloc(M*N*sizeof(double)); double *work = (double *)malloc(max(M,N)*sizeof(double)); alpha=1.0; beta=0.0; if (M >= N) { /* Extract the R */ double *R = (double *)malloc(N*N*sizeof(double)); memset((void*)R, 0, N*N*sizeof(double)); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,'u', M, N, A2, LDA, R, N); /* Perform Ql=Q*R */ memset((void*)Ql, 0, M*N*sizeof(double)); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, N, (alpha), Q, LDA, R, N, (beta), Ql, M); free(R); } else { /* Extract the L */ double *L = (double *)malloc(M*M*sizeof(double)); memset((void*)L, 0, M*M*sizeof(double)); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR,'l', M, N, A2, LDA, L, M); /* Perform Ql=LQ */ memset((void*)Ql, 0, M*N*sizeof(double)); cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, M, (alpha), L, M, Q, LDA, (beta), Ql, M); free(L); } /* Compute the Residual */ for (i = 0; i < M; i++) for (j = 0 ; j < N; j++) Residual[j*M+i] = A1[j*LDA+i]-Ql[j*M+i]; Rnorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Residual, M, work); Anorm = LAPACKE_dlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, A2, LDA, work); if (M >= N) { printf("============\n"); printf("Checking the QR Factorization \n"); printf("-- ||A-QR||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps)); } else { printf("============\n"); printf("Checking the LQ Factorization \n"); printf("-- ||A-LQ||_oo/(||A||_oo.N.eps) = %e \n",Rnorm/(Anorm*N*eps)); } if (isnan(Rnorm / (Anorm * N *eps)) || (Rnorm / (Anorm * N * eps) > 10.0) ) { printf("-- Factorization is suspicious ! \n"); info_factorization = 1; } else { printf("-- Factorization is CORRECT ! \n"); info_factorization = 0; } free(work); free(Ql); free(Residual); return info_factorization; }
int testing_dsposv(int argc, char **argv) { /* Check for number of arguments*/ if (argc != 4){ USAGE("CPOSV", "N LDA NRHS LDB", " - N : the size of the matrix\n" " - LDA : leading dimension of the matrix A\n" " - NRHS : number of RHS\n" " - LDB : leading dimension of the RHS B\n"); return -1; } int N = atoi(argv[0]); int LDA = atoi(argv[1]); int NRHS = atoi(argv[2]); int LDB = atoi(argv[3]); int ITER; double eps; int uplo; int info; int info_solution = 0; /*, info_factorization;*/ int i,j; int NminusOne = N-1; int LDBxNRHS = LDB*NRHS; double *A1 = (double *)malloc(LDA*N *sizeof(double)); double *A2 = (double *)malloc(LDA*N *sizeof(double)); double *B1 = (double *)malloc(LDB*NRHS*sizeof(double)); double *B2 = (double *)malloc(LDB*NRHS*sizeof(double)); double *WORK = (double *)malloc(2*LDA *sizeof(double)); double *D = (double *)malloc(LDA*sizeof(double)); /* Check if unable to allocate memory */ if ( (!A1) || (!A2) || (!B1) || (!B2) ){ printf("Out of Memory \n "); exit(0); } eps = LAPACKE_dlamch_work('e'); /*------------------------------------------------------------- * TESTING DSPOSV */ /* Initialize A1 and A2 for Symmetric Positif Matrix (Hessenberg in the complex case) */ LAPACKE_dlarnv_work(IONE, ISEED, LDA, D); dlagsy(&N, &NminusOne, D, A1, &LDA, ISEED, WORK, &info); free(D); for ( i = 0; i < N; i++) for ( j = 0; j < N; j++) A2[LDA*j+i] = A1[LDA*j+i]; for ( i = 0; i < N; i++){ A1[LDA*i+i] = A1[LDA*i+i] + N ; A2[LDA*i+i] = A1[LDA*i+i]; } /* Initialize B1 and B2 */ LAPACKE_dlarnv_work(IONE, ISEED, LDBxNRHS, B1); for ( i = 0; i < N; i++) for ( j = 0; j < NRHS; j++) B2[LDB*j+i] = B1[LDB*j+i]; printf("\n"); printf("------ TESTS FOR PLASMA DSPOSV ROUTINE ------ \n"); printf(" Size of the Matrix %d by %d\n", N, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n", eps); printf(" Computational tests pass if scaled residuals are less than 60.\n"); /* PLASMA DSPOSV */ uplo = PlasmaLower; info = PLASMA_dsposv(uplo, N, NRHS, A2, LDA, B1, LDB, B2, LDB, &ITER); if (info != PLASMA_SUCCESS ) { printf("PLASMA_dsposv is not completed: info = %d\n", info); info_solution = 1; } else { printf(" Solution obtained with %d iterations\n", ITER); /* Check the factorization and the solution */ info_solution = check_solution(N, NRHS, A1, LDA, B1, B2, LDB, eps); } if (info_solution == 0){ printf("***************************************************\n"); printf(" ---- TESTING DSPOSV ..................... PASSED !\n"); printf("***************************************************\n"); } else{ printf("***************************************************\n"); printf(" - TESTING DSPOSV .. FAILED !\n"); printf("***************************************************\n"); } free(A1); free(A2); free(B1); free(B2); free(WORK); return 0; }
int testing_zsymm(int argc, char **argv) { /* Check for number of arguments*/ if ( argc != 7 ){ USAGE("SYMM", "alpha beta M N K LDA LDB LDC", " - alpha : alpha coefficient \n" " - beta : beta coefficient \n" " - M : number of rows of matrices A and C \n" " - N : number of columns of matrices B and C \n" " - LDA : leading dimension of matrix A \n" " - LDB : leading dimension of matrix B \n" " - LDC : leading dimension of matrix C\n"); return -1; } PLASMA_Complex64_t alpha = (PLASMA_Complex64_t) atol(argv[0]); PLASMA_Complex64_t beta = (PLASMA_Complex64_t) atol(argv[1]); int M = atoi(argv[2]); int N = atoi(argv[3]); int LDA = atoi(argv[4]); int LDB = atoi(argv[5]); int LDC = atoi(argv[6]); int MNmax = max(M, N); double eps; int info_solution; int i, j, s, u; int LDAxM = LDA*MNmax; int LDBxN = LDB*N; int LDCxN = LDC*N; PLASMA_Complex64_t *A = (PLASMA_Complex64_t *)malloc(LDAxM*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *B = (PLASMA_Complex64_t *)malloc(LDBxN*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *C = (PLASMA_Complex64_t *)malloc(LDCxN*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *Cinit = (PLASMA_Complex64_t *)malloc(LDCxN*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *Cfinal = (PLASMA_Complex64_t *)malloc(LDCxN*sizeof(PLASMA_Complex64_t)); /* Check if unable to allocate memory */ if ((!A)||(!B)||(!Cinit)||(!Cfinal)){ printf("Out of Memory \n "); return -2; } eps = LAPACKE_dlamch_work('e'); printf("\n"); printf("------ TESTS FOR PLASMA ZSYMM ROUTINE ------- \n"); printf(" Size of the Matrix %d by %d\n", M, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 10.\n"); /*---------------------------------------------------------- * TESTING ZSYMM */ /* Initialize A */ PLASMA_zplgsy( (double)0., MNmax, A, LDA, 51 ); /* Initialize B */ LAPACKE_zlarnv_work(IONE, ISEED, LDBxN, B); /* Initialize C */ LAPACKE_zlarnv_work(IONE, ISEED, LDCxN, C); for (s=0; s<2; s++) { for (u=0; u<2; u++) { /* Initialize Cinit / Cfinal */ for ( i = 0; i < M; i++) for ( j = 0; j < N; j++) Cinit[LDC*j+i] = C[LDC*j+i]; for ( i = 0; i < M; i++) for ( j = 0; j < N; j++) Cfinal[LDC*j+i] = C[LDC*j+i]; /* PLASMA ZSYMM */ PLASMA_zsymm(side[s], uplo[u], M, N, alpha, A, LDA, B, LDB, beta, Cfinal, LDC); /* Check the solution */ info_solution = check_solution(side[s], uplo[u], M, N, alpha, A, LDA, B, LDB, beta, Cinit, Cfinal, LDC); if (info_solution == 0) { printf("***************************************************\n"); printf(" ---- TESTING ZSYMM (%5s, %5s) ....... PASSED !\n", sidestr[s], uplostr[u]); printf("***************************************************\n"); } else { printf("************************************************\n"); printf(" - TESTING ZSYMM (%s, %s) ... FAILED !\n", sidestr[s], uplostr[u]); printf("************************************************\n"); } } } free(A); free(B); free(C); free(Cinit); free(Cfinal); return 0; }
int testing_zhegst(int argc, char **argv) { /* Check for number of arguments*/ if (argc != 3) { USAGE("HEGST", "N LDA LDB", " - N : size of the matrices A and B\n" " - LDA : leading dimension of the matrix A\n" " - LDB : leading dimension of the matrix B\n"); return -1; } double eps = LAPACKE_dlamch_work('e'); int N = atoi(argv[0]); int LDA = atoi(argv[1]); int LDB = atoi(argv[2]); int info_transformation, info_factorization; int i, u; int LDAxN = LDA*N; int LDBxN = LDB*N; PLASMA_Complex64_t *A1 = (PLASMA_Complex64_t *)malloc(LDAxN*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *A2 = (PLASMA_Complex64_t *)malloc(LDAxN*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *B1 = (PLASMA_Complex64_t *)malloc(LDBxN*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *B2 = (PLASMA_Complex64_t *)malloc(LDBxN*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *Ainit = (PLASMA_Complex64_t *)malloc(LDAxN*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *Binit = (PLASMA_Complex64_t *)malloc(LDBxN*sizeof(PLASMA_Complex64_t)); /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!B1)||(!B2)||(!Ainit)||(!Binit)){ printf("Out of Memory \n "); return -2; } /*---------------------------------------------------------- * TESTING ZHEGST */ /* Initialize A1 and A2 */ PLASMA_zplghe(0., N, A1, LDA, 5198); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'A', N, N, A1, LDA, Ainit, LDA); /* Initialize B1 and B2 */ PLASMA_zplghe((double)N, N, B1, LDB, 4231); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'A', N, N, B1, LDB, Binit, LDB); printf("\n"); printf("------ TESTS FOR PLASMA ZHEGST ROUTINE ------- \n"); printf(" Size of the Matrix %d by %d\n", N, N); printf("\n"); printf(" The matrices A and B are randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 60.\n"); /*---------------------------------------------------------- * TESTING ZHEGST */ for (i=0; i<3; i++) { for (u=0; u<2; u++) { memcpy(A2, Ainit, LDAxN*sizeof(PLASMA_Complex64_t)); memcpy(B2, Binit, LDBxN*sizeof(PLASMA_Complex64_t)); PLASMA_zpotrf(uplo[u], N, B2, LDB); PLASMA_zhegst(itype[i], uplo[u], N, A2, LDA, B2, LDB); /* Check the Cholesky factorization and the transformation */ info_factorization = check_factorization(N, B1, B2, LDB, uplo[u], eps); info_transformation = check_transformation(itype[i], uplo[u], N, A1, A2, LDA, B2, LDB, eps); if ( (info_transformation == 0) && (info_factorization == 0) ) { printf("***************************************************\n"); printf(" ---- TESTING ZHEGST (%s, %s) ....... PASSED !\n", itypestr[i], uplostr[u]); printf("***************************************************\n"); } else { printf("************************************************\n"); printf(" - TESTING ZHEGST (%s, %s) ... FAILED !\n", itypestr[i], uplostr[u]); printf("************************************************\n"); } } } free(A1); free(A2); free(B1); free(B2); free(Ainit); free(Binit); return 0; }
int testing_zgemm(int argc, char **argv) { /* Check for number of arguments*/ if ( argc != 8) { USAGE("GEMM", "alpha beta M N K LDA LDB LDC", " - alpha : alpha coefficient\n" " - beta : beta coefficient\n" " - M : number of rows of matrices A and C\n" " - N : number of columns of matrices B and C\n" " - K : number of columns of matrix A / number of rows of matrix B\n" " - LDA : leading dimension of matrix A\n" " - LDB : leading dimension of matrix B\n" " - LDC : leading dimension of matrix C\n"); return -1; } PLASMA_Complex64_t alpha = (PLASMA_Complex64_t) atol(argv[0]); PLASMA_Complex64_t beta = (PLASMA_Complex64_t) atol(argv[1]); int M = atoi(argv[2]); int N = atoi(argv[3]); int K = atoi(argv[4]); int LDA = atoi(argv[5]); int LDB = atoi(argv[6]); int LDC = atoi(argv[7]); double eps; int info_solution; int i, j, ta, tb; int LDAxK = LDA*max(M,K); int LDBxN = LDB*max(K,N); int LDCxN = LDC*N; PLASMA_Complex64_t *A = (PLASMA_Complex64_t *)malloc(LDAxK*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *B = (PLASMA_Complex64_t *)malloc(LDBxN*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *C = (PLASMA_Complex64_t *)malloc(LDCxN*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *Cinit = (PLASMA_Complex64_t *)malloc(LDCxN*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *Cfinal = (PLASMA_Complex64_t *)malloc(LDCxN*sizeof(PLASMA_Complex64_t)); /* Check if unable to allocate memory */ if ((!A)||(!B)||(!Cinit)||(!Cfinal)){ printf("Out of Memory \n "); return -2; } eps = LAPACKE_dlamch_work('e'); printf("\n"); printf("------ TESTS FOR PLASMA ZGEMM ROUTINE ------- \n"); printf(" Size of the Matrix %d by %d\n", M, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 10.\n"); /*---------------------------------------------------------- * TESTING ZGEMM */ /* Initialize A, B, C */ LAPACKE_zlarnv_work(IONE, ISEED, LDAxK, A); LAPACKE_zlarnv_work(IONE, ISEED, LDBxN, B); LAPACKE_zlarnv_work(IONE, ISEED, LDCxN, C); #ifdef COMPLEX for (ta=0; ta<3; ta++) { for (tb=0; tb<3; tb++) { #else for (ta=0; ta<2; ta++) { for (tb=0; tb<2; tb++) { #endif for ( i = 0; i < M; i++) for ( j = 0; j < N; j++) Cinit[LDC*j+i] = C[LDC*j+i]; for ( i = 0; i < M; i++) for ( j = 0; j < N; j++) Cfinal[LDC*j+i] = C[LDC*j+i]; /* PLASMA ZGEMM */ PLASMA_zgemm(trans[ta], trans[tb], M, N, K, alpha, A, LDA, B, LDB, beta, Cfinal, LDC); /* Check the solution */ info_solution = check_solution(trans[ta], trans[tb], M, N, K, alpha, A, LDA, B, LDB, beta, Cinit, Cfinal, LDC); if (info_solution == 0) { printf("***************************************************\n"); printf(" ---- TESTING ZGEMM (%s, %s) ............... PASSED !\n", transstr[ta], transstr[tb]); printf("***************************************************\n"); } else { printf("************************************************\n"); printf(" - TESTING ZGEMM (%s, %s) ... FAILED !\n", transstr[ta], transstr[tb]); printf("************************************************\n"); } } } #ifdef _UNUSED_ }} #endif free(A); free(B); free(C); free(Cinit); free(Cfinal); return 0; }
static int check_solution(PLASMA_enum transA, PLASMA_enum transB, int M, int N, int K, PLASMA_Complex64_t alpha, PLASMA_Complex64_t *A, int LDA, PLASMA_Complex64_t *B, int LDB, PLASMA_Complex64_t beta, PLASMA_Complex64_t *Cref, PLASMA_Complex64_t *Cplasma, int LDC) { int info_solution; double Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm, Rnorm, result; double eps; PLASMA_Complex64_t beta_const; double *work = (double *)malloc(max(K,max(M, N))* sizeof(double)); int Am, An, Bm, Bn; beta_const = -1.0; if (transA == PlasmaNoTrans) { Am = M; An = K; } else { Am = K; An = M; } if (transB == PlasmaNoTrans) { Bm = K; Bn = N; } else { Bm = N; Bn = K; } Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), Am, An, A, LDA, work); Bnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), Bm, Bn, B, LDB, work); Cinitnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); Cplasmanorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cplasma, LDC, work); cblas_zgemm(CblasColMajor, (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, M, N, K, CBLAS_SADDR(alpha), A, LDA, B, LDB, CBLAS_SADDR(beta), Cref, LDC); Clapacknorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); cblas_zaxpy(LDC * N, CBLAS_SADDR(beta_const), Cplasma, 1, Cref, 1); Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), M, N, Cref, LDC, work); eps = LAPACKE_dlamch_work('e'); printf("Rnorm %e, Anorm %e, Bnorm %e, Cinitnorm %e, Cplasmanorm %e, Clapacknorm %e\n", Rnorm, Anorm, Bnorm, Cinitnorm, Cplasmanorm, Clapacknorm); result = Rnorm / ((Anorm + Bnorm + Cinitnorm) * N * eps); printf("============\n"); printf("Checking the norm of the difference against reference ZGEMM \n"); printf("-- ||Cplasma - Clapack||_oo/((||A||_oo+||B||_oo+||C||_oo).N.eps) = %e \n", result); if ( isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) { printf("-- The solution is suspicious ! \n"); info_solution = 1; } else { printf("-- The solution is CORRECT ! \n"); info_solution= 0 ; } free(work); return info_solution; }
int testing_dsygv(int argc, char **argv) { /* Check for number of arguments*/ if (argc != 3) { USAGE("HEGV", "N LDA LDB", " - N : size of the matrices A and B\n" " - LDA : leading dimension of the matrix A\n" " - LDB : leading dimension of the matrix B\n"); return -1; } double eps = LAPACKE_dlamch_work('e'); PLASMA_enum vec = PlasmaNoVec; int N = atoi(argv[0]); int LDA = atoi(argv[1]); int LDB = atoi(argv[2]); int LDQ = LDA; int LDAxN = LDA*N; int LDBxN = LDB*N; int LDQxN = LDQ*N; int info_ortho = 0; int info_solution = 0; int info_reduction = 0; int i, u; double *A1 = (double *)malloc(LDAxN*sizeof(double)); double *A2 = (double *)malloc(LDAxN*sizeof(double)); double *B1 = (double *)malloc(LDBxN*sizeof(double)); double *B2 = (double *)malloc(LDBxN*sizeof(double)); double *Q = (double *)malloc(LDQxN*sizeof(double)); double *Ainit = (double *)malloc(LDAxN*sizeof(double)); double *Binit = (double *)malloc(LDBxN*sizeof(double)); double *W1 = (double *)malloc(N*sizeof(double)); double *W2 = (double *)malloc(N*sizeof(double)); double *work = (double *)malloc(3*N* sizeof(double)); PLASMA_desc *T; /* Check if unable to allocate memory */ if ((!A1)||(!A2)||(!B1)||(!B2)||(!Q)||(!Ainit)||(!Binit)){ printf("Out of Memory \n "); return -2; } /* PLASMA_Disable(PLASMA_AUTOTUNING); PLASMA_Set(PLASMA_TILE_SIZE, 120); PLASMA_Set(PLASMA_INNER_BLOCK_SIZE, 20); */ PLASMA_Enable(PLASMA_WARNINGS); PLASMA_Enable(PLASMA_ERRORS); PLASMA_Alloc_Workspace_dsygv(N, N, &T); /*---------------------------------------------------------- * TESTING DSYGV */ /* Initialize A1 and Ainit */ PLASMA_dplgsy(0., N, A1, LDA, 5198); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, 'A', N, N, A1, LDA, Ainit, LDA); /* Initialize B1 and Binit */ PLASMA_dplgsy((double)N, N, B1, LDB, 4321 ); LAPACKE_dlacpy_work(LAPACK_COL_MAJOR, 'A', N, N, B1, LDB, Binit, LDB); printf("\n"); printf("------ TESTS FOR PLASMA DSYGV ROUTINE ------- \n"); printf(" Size of the Matrix %d by %d\n", N, N); printf("\n"); printf(" The matrix A is randomly generated for each test.\n"); printf("============\n"); printf(" The relative machine precision (eps) is to be %e \n",eps); printf(" Computational tests pass if scaled residuals are less than 60.\n"); /*---------------------------------------------------------- * TESTING DSYGV */ for (i=0; i<3; i++) { for (u=0; u<2; u++) { LAPACKE_dlaset_work(LAPACK_COL_MAJOR, 'A', LDA, N, 0., 1., Q, LDA); memcpy(A2, Ainit, LDAxN*sizeof(double)); memcpy(B2, Binit, LDBxN*sizeof(double)); PLASMA_dsygv(itype[i], vec, uplo[u], N, A2, LDA, B2, LDB, W2, T, Q, LDQ); /* Check the orthogonality, reduction and the eigen solutions */ if (vec == PlasmaVec) info_ortho = check_orthogonality(N, N, Q, LDA, eps); /* * WARNING: For now, Q is associated to Band tridiagonal reduction and * not to the final tridiagonal reduction, so we can not call the check */ if (0) info_reduction = check_reduction(itype[i], uplo[u], N, 1, A1, A2, LDA, B2, LDB, Q, eps); memcpy(A1, Ainit, LDAxN*sizeof(double)); memcpy(B1, Binit, LDBxN*sizeof(double)); LAPACKE_dsygv( LAPACK_COL_MAJOR, itype[i], lapack_const(vec), lapack_const(uplo[u]), N, A1, LDA, B1, LDB, W1); /*info_solution = check_solution(N, N, N, A1, LDA, B1, B2, LDB, eps);*/ info_solution = check_solution(N, W1, W2, eps); if ( (info_ortho == 0) & (info_reduction == 0) & (info_solution == 0)) { printf("***************************************************\n"); printf(" ---- TESTING DSYGV (%s, %s) ...................... PASSED !\n", itypestr[i], uplostr[u]); printf("***************************************************\n"); } else { printf("************************************************\n"); printf(" - TESTING DSYGV (%s, %s) ... FAILED !\n", itypestr[i], uplostr[u]); printf("************************************************\n"); } } } PLASMA_Dealloc_Handle_Tile(&T); free(A1); free(A2); free(B1); free(B2); free(Q); free(Ainit); free(Binit); free(W1); free(W2); free(work); return 0; }
/***************************************************************************//** * * @ingroup CORE_PLASMA_Complex64_t * * CORE_zgetf2_nopiv computes an LU factorization of a general diagonal * dominant M-by-N matrix A witout no pivoting and no blocking. It is the * internal function called by CORE_zgetrf_nopiv(). * * The factorization has the form * A = L * U * where L is lower triangular with unit * diagonal elements (lower trapezoidal if m > n), and U is upper * triangular (upper trapezoidal if m < n). * * This is the right-looking Level 3 BLAS version of the algorithm. * ******************************************************************************* * * @param[in] M * The number of rows of the matrix A. M >= 0. * * @param[in] N * The number of columns of the matrix A. N >= 0. * * @param[in,out] A * On entry, the M-by-N matrix to be factored. * On exit, the factors L and U from the factorization * A = P*L*U; the unit diagonal elements of L are not stored. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). * ******************************************************************************* * * @return * \retval PLASMA_SUCCESS successful exit * \retval <0 if INFO = -k, the k-th argument had an illegal value * \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. * ******************************************************************************/ int CORE_zgetf2_nopiv(int M, int N, PLASMA_Complex64_t *A, int LDA) { PLASMA_Complex64_t mzone = (PLASMA_Complex64_t)-1.0; PLASMA_Complex64_t alpha; double sfmin; int i, j, k; int info; /* Check input arguments */ info = 0; if (M < 0) { coreblas_error(1, "Illegal value of M"); return -1; } if (N < 0) { coreblas_error(2, "Illegal value of N"); return -2; } if ((LDA < max(1,M)) && (M > 0)) { coreblas_error(5, "Illegal value of LDA"); return -5; } /* Quick return */ if ( (M == 0) || (N == 0) ) return PLASMA_SUCCESS; sfmin = LAPACKE_dlamch_work('S'); k = min(M, N); for(i=0 ; i < k; i++) { alpha = A[i*LDA+i]; if ( alpha != (PLASMA_Complex64_t)0.0 ) { /* Compute elements J+1:M of J-th column. */ if (i < M) { if ( cabs(alpha) > sfmin ) { alpha = 1.0 / alpha; cblas_zscal( M-i-1, CBLAS_SADDR(alpha), &(A[i*LDA+i+1]), 1); } else { for(j=i+1; j<M; j++) A[LDA*i+j] = A[LDA*i+j] / alpha; } } } else if ( info == 0 ) { info = i; goto end; } if ( i < k ) { /* Update trailing submatrix */ cblas_zgeru(CblasColMajor, M-i-1, N-i-1, CBLAS_SADDR(mzone), &A[LDA* i +i+1], 1, &A[LDA*(i+1)+i ], LDA, &A[LDA*(i+1)+i+1], LDA); } } end: return info; }