void F77_ctrsm(int *layout, char *rtlf, char *uplow, char *transp, char *diagn, int *m, int *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, CBLAS_TEST_COMPLEX *b, int *ldb) { int i,j,LDA,LDB; CBLAS_TEST_COMPLEX *A, *B; CBLAS_SIDE side; CBLAS_DIAG diag; CBLAS_UPLO uplo; CBLAS_TRANSPOSE trans; get_uplo_type(uplow,&uplo); get_transpose_type(transp,&trans); get_diag_type(diagn,&diag); get_side_type(rtlf,&side); if (*layout == TEST_ROW_MJR) { if (side == CblasLeft) { LDA = *m+1; A=(CBLAS_TEST_COMPLEX* )malloc( (*m)*LDA*sizeof(CBLAS_TEST_COMPLEX ) ); for( i=0; i<*m; i++ ) for( j=0; j<*m; j++ ) { A[i*LDA+j].real=a[j*(*lda)+i].real; A[i*LDA+j].imag=a[j*(*lda)+i].imag; } } else{ LDA = *n+1; A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX)); for( i=0; i<*n; i++ ) for( j=0; j<*n; j++ ) { A[i*LDA+j].real=a[j*(*lda)+i].real; A[i*LDA+j].imag=a[j*(*lda)+i].imag; } } LDB = *n+1; B=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_COMPLEX)); for( i=0; i<*m; i++ ) for( j=0; j<*n; j++ ) { B[i*LDB+j].real=b[j*(*ldb)+i].real; B[i*LDB+j].imag=b[j*(*ldb)+i].imag; } cblas_ctrsm(CblasRowMajor, side, uplo, trans, diag, *m, *n, alpha, A, LDA, B, LDB ); for( j=0; j<*n; j++ ) for( i=0; i<*m; i++ ) { b[j*(*ldb)+i].real=B[i*LDB+j].real; b[j*(*ldb)+i].imag=B[i*LDB+j].imag; } free(A); free(B); } else if (*layout == TEST_COL_MJR) cblas_ctrsm(CblasColMajor, side, uplo, trans, diag, *m, *n, alpha, a, *lda, b, *ldb); else cblas_ctrsm(UNDEFINED, side, uplo, trans, diag, *m, *n, alpha, a, *lda, b, *ldb); }
static inline void CORE_cgetrf_reclap_update(const int M, const int column, const int n1, const int n2, PLASMA_Complex32_t *A, const int LDA, int *IPIV, const int thidx, const int thcnt) { static PLASMA_Complex32_t posone = 1.0; static PLASMA_Complex32_t negone = -1.0; PLASMA_Complex32_t *Atop = A + column*LDA; PLASMA_Complex32_t *Atop2 = Atop + n1 *LDA; int coff, ccnt, lm, loff; CORE_cbarrier_thread( thidx, thcnt ); psplit( n2, thidx, thcnt, &coff, &ccnt ); if (ccnt > 0) { CORE_claswap1( ccnt, Atop2 + coff*LDA, LDA, column, n1 + column, IPIV ); /* swap to the right */ cblas_ctrsm( CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, n1, ccnt, CBLAS_SADDR(posone), Atop + column, LDA, Atop2 + coff*LDA + column, LDA ); } /* __sync_synchronize(); */ /* hopefully we will not need memory fences */ /* need to wait for pivoting and triangular solve to finish */ CORE_cbarrier_thread( thidx, thcnt ); psplit( M, thidx, thcnt, &loff, &lm ); if (thidx == 0) { loff = column + n1; lm -= column + n1; }; cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, lm, n2, n1, CBLAS_SADDR(negone), Atop+loff, LDA, Atop2 + column, LDA, CBLAS_SADDR(posone), Atop2+loff, LDA ); }
inline void trsm( CBLAS_ORDER order, CBLAS_UPLO uplo,CBLAS_TRANSPOSE transA, CBLAS_SIDE side, CBLAS_DIAG unit, int n, int nRHS, std::complex<float> const *A, int lda, std::complex<float> *B, int ldb ) { std::complex<float> alpha(1.0,0); cblas_ctrsm(order, side, uplo, transA, unit,n, nRHS, reinterpret_cast<cblas_float_complex_type const *>(&alpha), reinterpret_cast<cblas_float_complex_type const *>(A), lda, reinterpret_cast<cblas_float_complex_type *>(B), ldb); }
DLLEXPORT int c_qr_solve(int m, int n, int bn, complex r[], complex b[], complex x[], complex work[], int len) { int info = 0; complex* clone_r = new complex[m*n]; memcpy(clone_r, r, m*n*sizeof(complex)); complex* tau = new complex[min(m,n)]; cgeqrf_(&m, &n, clone_r, &m, tau, work, &len, &info); if (info != 0) { delete[] clone_r; delete[] tau; return info; } char side ='L'; char tran = 'C'; complex* clone_b = new complex[m*bn]; memcpy(clone_b, b, m*bn*sizeof(complex)); cunmqr_(&side, &tran, &m, &bn, &n, clone_r, &m, tau, clone_b, &m, work, &len, &info); complex one = {1.0, 0.0}; cblas_ctrsm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, n, bn, &one, clone_r, m, clone_b, m); for (int i = 0; i < n; ++i) { for (int j = 0; j < bn; ++j) { x[j * n + i] = clone_b[j * m + i]; } } delete[] clone_r; delete[] tau; delete[] clone_b; return info; }
DLLEXPORT MKL_INT c_qr_solve_factored(MKL_INT m, MKL_INT n, MKL_INT bn, MKL_Complex8 r[], MKL_Complex8 b[], MKL_Complex8 tau[], MKL_Complex8 x[], MKL_Complex8 work[], MKL_INT len) { char side ='L'; char tran = 'C'; MKL_INT info = 0; MKL_Complex8* clone_b = new MKL_Complex8[m*bn]; std::memcpy(clone_b, b, m*bn*sizeof(MKL_Complex8)); cunmqr_(&side, &tran, &m, &bn, &n, r, &m, tau, clone_b, &m, work, &len, &info); MKL_Complex8 one = {1.0f, 0.0f}; cblas_ctrsm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, n, bn, &one, r, m, clone_b, m); for (MKL_INT i = 0; i < n; ++i) { for (MKL_INT j = 0; j < bn; ++j) { x[j * n + i] = clone_b[j * m + i]; } } delete[] clone_b; return info; }
DLLEXPORT int c_qr_solve_factored(int m, int n, int bn, complex r[], complex b[], complex tau[], complex x[], complex work[], int len) { char side ='L'; char tran = 'C'; int info = 0; complex* clone_b = new complex[m*bn]; memcpy(clone_b, b, m*bn*sizeof(complex)); cunmqr_(&side, &tran, &m, &bn, &n, r, &m, tau, clone_b, &m, work, &len, &info); complex one = {1.0f, 0.0f}; cblas_ctrsm(CblasColMajor, CblasLeft, CblasUpper, CblasNoTrans, CblasNonUnit, n, bn, &one, r, m, clone_b, m); for (int i = 0; i < n; ++i) { for (int j = 0; j < bn; ++j) { x[j * n + i] = clone_b[j * m + i]; } } delete[] clone_b; return info; }
int CORE_cgessm(int M, int N, int K, int IB, int *IPIV, PLASMA_Complex32_t *L, int LDL, PLASMA_Complex32_t *A, int LDA) { static PLASMA_Complex32_t zone = 1.0; static PLASMA_Complex32_t mzone = -1.0; static int ione = 1; int i, sb; int tmp, tmp2; /* Check input arguments */ if (M < 0) { coreblas_error(1, "Illegal value of M"); return -1; } if (N < 0) { coreblas_error(2, "Illegal value of N"); return -2; } if (K < 0) { coreblas_error(3, "Illegal value of K"); return -3; } if (IB < 0) { coreblas_error(4, "Illegal value of IB"); return -4; } if ((LDL < max(1,M)) && (M > 0)) { coreblas_error(7, "Illegal value of LDL"); return -7; } if ((LDA < max(1,M)) && (M > 0)) { coreblas_error(9, "Illegal value of LDA"); return -9; } /* Quick return */ if ((M == 0) || (N == 0) || (K == 0) || (IB == 0)) return PLASMA_SUCCESS; for(i = 0; i < K; i += IB) { sb = min(IB, K-i); /* * Apply interchanges to columns I*IB+1:IB*( I+1 )+1. */ tmp = i+1; tmp2 = i+sb; LAPACKE_claswp_work(LAPACK_COL_MAJOR, N, A, LDA, tmp, tmp2, IPIV, ione); /* * Compute block row of U. */ cblas_ctrsm( CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, sb, N, CBLAS_SADDR(zone), &L[LDL*i+i], LDL, &A[i], LDA ); if (i+sb < M) { /* * Update trailing submatrix. */ cblas_cgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, M-(i+sb), N, sb, CBLAS_SADDR(mzone), &L[LDL*i+(i+sb)], LDL, &A[i], LDA, CBLAS_SADDR(zone), &A[i+sb], LDA ); } } return PLASMA_SUCCESS; }