void F77_ztrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn, int *m, int *n, CBLAS_TEST_ZOMPLEX *alpha, CBLAS_TEST_ZOMPLEX *a, int *lda, CBLAS_TEST_ZOMPLEX *b, int *ldb) { int i,j,LDA,LDB; CBLAS_TEST_ZOMPLEX *A, *B; enum CBLAS_SIDE side; enum CBLAS_DIAG diag; enum CBLAS_UPLO uplo; enum CBLAS_TRANSPOSE trans; get_uplo_type(uplow,&uplo); get_transpose_type(transp,&trans); get_diag_type(diagn,&diag); get_side_type(rtlf,&side); if (*order == TEST_ROW_MJR) { if (side == CblasLeft) { LDA = *m+1; A=(CBLAS_TEST_ZOMPLEX* )malloc( (*m)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) ); for( i=0; i<*m; i++ ) for( j=0; j<*m; j++ ) { A[i*LDA+j].real=a[j*(*lda)+i].real; A[i*LDA+j].imag=a[j*(*lda)+i].imag; } } else{ LDA = *n+1; A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX)); for( i=0; i<*n; i++ ) for( j=0; j<*n; j++ ) { A[i*LDA+j].real=a[j*(*lda)+i].real; A[i*LDA+j].imag=a[j*(*lda)+i].imag; } } LDB = *n+1; B=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_ZOMPLEX)); for( i=0; i<*m; i++ ) for( j=0; j<*n; j++ ) { B[i*LDB+j].real=b[j*(*ldb)+i].real; B[i*LDB+j].imag=b[j*(*ldb)+i].imag; } cblas_ztrsm(CblasRowMajor, side, uplo, trans, diag, *m, *n, alpha, A, LDA, B, LDB ); for( j=0; j<*n; j++ ) for( i=0; i<*m; i++ ) { b[j*(*ldb)+i].real=B[i*LDB+j].real; b[j*(*ldb)+i].imag=B[i*LDB+j].imag; } free(A); free(B); } else if (*order == TEST_COL_MJR) cblas_ztrsm(CblasColMajor, side, uplo, trans, diag, *m, *n, alpha, a, *lda, b, *ldb); else cblas_ztrsm(UNDEFINED, side, uplo, trans, diag, *m, *n, alpha, a, *lda, b, *ldb); }
inline void trsm( CBLAS_ORDER order, CBLAS_UPLO uplo,CBLAS_TRANSPOSE transA, CBLAS_SIDE side, CBLAS_DIAG unit, int n, int nRHS, std::complex<double> const *A, int lda, std::complex<double> *B, int ldb ) { std::complex<double> alpha(1.0,0); cblas_ztrsm(order, side, uplo, transA, unit,n, nRHS, reinterpret_cast<cblas_double_complex_type const *>(&alpha), reinterpret_cast<cblas_double_complex_type const *>(A), lda, reinterpret_cast<cblas_double_complex_type *>(B), ldb); }
DLLEXPORT int z_qr_solve(int m, int n, int bn, doublecomplex r[], doublecomplex b[], doublecomplex x[], doublecomplex work[], int len) { int info = 0; doublecomplex* clone_r = new doublecomplex[m*n]; memcpy(clone_r, r, m*n*sizeof(doublecomplex)); doublecomplex* tau = new doublecomplex[min(m,n)]; zgeqrf_(&m, &n, clone_r, &m, tau, work, &len, &info); if (info != 0) { delete[] clone_r; delete[] tau; return info; } char side ='L'; char tran = 'C'; doublecomplex* clone_b = new doublecomplex[m*bn]; memcpy(clone_b, b, m*bn*sizeof(doublecomplex)); zunmqr_(&side, &tran, &m, &bn, &n, clone_r, &m, tau, clone_b, &m, work, &len, &info); doublecomplex one = {1.0, 0.0}; cblas_ztrsm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, n, bn, &one, clone_r, m, clone_b, m); for (int i = 0; i < n; ++i) { for (int j = 0; j < bn; ++j) { x[j * n + i] = clone_b[j * m + i]; } } delete[] clone_r; delete[] tau; delete[] clone_b; return info; }
DLLEXPORT MKL_INT z_qr_solve_factored(MKL_INT m, MKL_INT n, MKL_INT bn, MKL_Complex16 r[], MKL_Complex16 b[], MKL_Complex16 tau[], MKL_Complex16 x[], MKL_Complex16 work[], MKL_INT len) { char side ='L'; char tran = 'C'; MKL_INT info = 0; MKL_Complex16* clone_b = new MKL_Complex16[m*bn]; std::memcpy(clone_b, b, m*bn*sizeof(MKL_Complex16)); zunmqr_(&side, &tran, &m, &bn, &n, r, &m, tau, clone_b, &m, work, &len, &info); MKL_Complex16 one = {1.0, 0.0}; cblas_ztrsm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, n, bn, &one, r, m, clone_b, m); for (MKL_INT i = 0; i < n; ++i) { for (MKL_INT j = 0; j < bn; ++j) { x[j * n + i] = clone_b[j * m + i]; } } delete[] clone_b; return info; }
static int check_transformation(int itype, int uplo, int N, PLASMA_Complex64_t *A1, PLASMA_Complex64_t *A2, int LDA, PLASMA_Complex64_t *B2, int LDB, double eps) { PLASMA_Complex64_t alpha = 1.0; double Anorm, Rnorm, result; int info_transformation; int i, j; char *str; PLASMA_Complex64_t *Residual = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t)); PLASMA_Complex64_t *Aorig = (PLASMA_Complex64_t *)malloc(N*N*sizeof(PLASMA_Complex64_t)); double *work = (double *)malloc(N*sizeof(double)); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'a', N, N, A1, LDA, Residual, N); LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, lapack_const(uplo), N, N, A2, LDA, Aorig, N); /* Rebuild the symmetry of A2 */ if (uplo == PlasmaLower) { for (j = 0; j < N; j++) for (i = j+1; i < N; i++) Aorig[j+i*N] = conj(Aorig[i+j*N]); } else { for (i = 0; i < N; i++) for (j = i+1; j < N; j++) Aorig[j+i*N] = conj(Aorig[i+j*N]); } if (itype == 1) { if (uplo == PlasmaLower) { str = "L*A2*L'"; cblas_ztrmm(CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); cblas_ztrmm(CblasColMajor, CblasRight, CblasLower, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); } else{ str = "U'*A2*U"; cblas_ztrmm(CblasColMajor, CblasLeft, CblasUpper, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); cblas_ztrmm(CblasColMajor, CblasRight, CblasUpper, CblasNoTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); } } else { if (uplo == PlasmaLower) { str = "inv(L')*A2*inv(L)"; cblas_ztrsm(CblasColMajor, CblasLeft, CblasLower, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); cblas_ztrsm(CblasColMajor, CblasRight, CblasLower, CblasNoTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); } else{ str = "inv(U)*A2*inv(U')"; cblas_ztrsm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); cblas_ztrsm(CblasColMajor, CblasRight, CblasUpper, CblasConjTrans, CblasNonUnit, N, N, CBLAS_SADDR(alpha), B2, LDB, Aorig, N); } } /* Compute the Residual ( A1 - W ) */ for (i = 0; i < N; i++) for (j = 0; j < N; j++) Residual[j*N+i] = Aorig[j*N+i] - Residual[j*N+i]; Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, Residual, N, work); Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, lapack_const(PlasmaInfNorm), N, N, A1, LDA, work); result = Rnorm / (Anorm * N *eps); printf("============\n"); printf("Checking the global transformation \n"); printf("-- ||A1-%s||_oo/(||A1||_oo.N.eps) = %e \n", str, result ); if (isnan(result) || isinf(result) || (result > 60.0) ) { printf("-- Transformation is suspicious ! \n"); info_transformation = 1; } else { printf("-- Transformation is CORRECT ! \n"); info_transformation = 0; } free(Residual); free(Aorig); free(work); return info_transformation; }