/** ZGBTRF computes an LU factorization of a complex m-by-n band matrix A using partial pivoting with row interchanges. * * This routine is functionally equivalent to LAPACK's zgbtrf. * For details on its interface, see * http://www.netlib.org/lapack/explore-html/dc/dcb/zgbtrf_8f.html * */ void RELAPACK_zgbtrf( const int *m, const int *n, const int *kl, const int *ku, double *Ab, const int *ldAb, int *ipiv, int *info ) { // Check arguments *info = 0; if (*m < 0) *info = -1; else if (*n < 0) *info = -2; else if (*kl < 0) *info = -3; else if (*ku < 0) *info = -4; else if (*ldAb < 2 * *kl + *ku + 1) *info = -6; if (*info) { const int minfo = -*info; LAPACK(xerbla)("ZGBTRF", &minfo); return; } // Constant const double ZERO[] = { 0., 0. }; // Result upper band width const int kv = *ku + *kl; // Unskew A const int ldA[] = { *ldAb - 1 }; double *const A = Ab + 2 * kv; // Zero upper diagonal fill-in elements int i, j; for (j = 0; j < *n; j++) { double *const A_j = A + 2 * *ldA * j; for (i = MAX(0, j - kv); i < j - *ku; i++) A_j[2 * i] = A_j[2 * i + 1] = 0.; } // Allocate work space const int n1 = ZREC_SPLIT(*n); const int mWorkl = (kv > n1) ? MAX(1, *m - *kl) : kv; const int nWorkl = (kv > n1) ? n1 : kv; const int mWorku = (*kl > n1) ? n1 : *kl; const int nWorku = (*kl > n1) ? MAX(0, *n - *kl) : *kl; double *Workl = malloc(mWorkl * nWorkl * 2 * sizeof(double)); double *Worku = malloc(mWorku * nWorku * 2 * sizeof(double)); LAPACK(zlaset)("L", &mWorkl, &nWorkl, ZERO, ZERO, Workl, &mWorkl); LAPACK(zlaset)("U", &mWorku, &nWorku, ZERO, ZERO, Worku, &mWorku); // Recursive kernel RELAPACK_zgbtrf_rec(m, n, kl, ku, Ab, ldAb, ipiv, Workl, &mWorkl, Worku, &mWorku, info); // Free work space free(Workl); free(Worku); }
/** dgetrf's recursive compute kernel */ static void RELAPACK_dgetrf_rec( const int *m, const int *n, double *A, const int *ldA, int *ipiv, int *info ) { if (*n <= MAX(CROSSOVER_DGETRF, 1)) { // Unblocked LAPACK(dgetf2)(m, n, A, ldA, ipiv, info); return; } // Constants const double ONE[] = { 1. }; const double MONE[] = { -1. }; const int iONE[] = { 1 }; // Splitting const int n1 = DREC_SPLIT(*n); const int n2 = *n - n1; const int m2 = *m - n1; // A_L A_R double *const A_L = A; double *const A_R = A + *ldA * n1; // A_TL A_TR // A_BL A_BR double *const A_TL = A; double *const A_TR = A + *ldA * n1; double *const A_BL = A + n1; double *const A_BR = A + *ldA * n1 + n1; // ipiv_T // ipiv_B int *const ipiv_T = ipiv; int *const ipiv_B = ipiv + n1; // recursion(A_L, ipiv_T) RELAPACK_dgetrf_rec(m, &n1, A_L, ldA, ipiv_T, info); // apply pivots to A_R LAPACK(dlaswp)(&n2, A_R, ldA, iONE, &n1, ipiv_T, iONE); // A_TR = A_TL \ A_TR BLAS(dtrsm)("L", "L", "N", "U", &n1, &n2, ONE, A_TL, ldA, A_TR, ldA); // A_BR = A_BR - A_BL * A_TR BLAS(dgemm)("N", "N", &m2, &n2, &n1, MONE, A_BL, ldA, A_TR, ldA, ONE, A_BR, ldA); // recursion(A_BR, ipiv_B) RELAPACK_dgetrf_rec(&m2, &n2, A_BR, ldA, ipiv_B, info); if (*info) *info += n1; // apply pivots to A_BL LAPACK(dlaswp)(&n1, A_BL, ldA, iONE, &n2, ipiv_B, iONE); // shift pivots int i; for (i = 0; i < n2; i++) ipiv_B[i] += n1; }
/** CGEMMT computes a matrix-matrix product with general matrices but updates * only the upper or lower triangular part of the result matrix. * * This routine performs the same operation as the BLAS routine * cgemm(transA, transB, n, n, k, alpha, A, ldA, B, ldB, beta, C, ldC) * but only updates the triangular part of C specified by uplo: * If (*uplo == 'L'), only the lower triangular part of C is updated, * otherwise the upper triangular part is updated. * */ void RELAPACK_cgemmt( const char *uplo, const char *transA, const char *transB, const int *n, const int *k, const float *alpha, const float *A, const int *ldA, const float *B, const int *ldB, const float *beta, float *C, const int *ldC ) { #if HAVE_XGEMMT BLAS(cgemmt)(uplo, transA, transB, n, k, alpha, A, ldA, B, ldB, beta, C, ldC); return; #else // Check arguments const int lower = LAPACK(lsame)(uplo, "L"); const int upper = LAPACK(lsame)(uplo, "U"); const int notransA = LAPACK(lsame)(transA, "N"); const int tranA = LAPACK(lsame)(transA, "T"); const int ctransA = LAPACK(lsame)(transA, "C"); const int notransB = LAPACK(lsame)(transB, "N"); const int tranB = LAPACK(lsame)(transB, "T"); const int ctransB = LAPACK(lsame)(transB, "C"); int info = 0; if (!lower && !upper) info = 1; else if (!tranA && !ctransA && !notransA) info = 2; else if (!tranB && !ctransB && !notransB) info = 3; else if (*n < 0) info = 4; else if (*k < 0) info = 5; else if (*ldA < MAX(1, notransA ? *n : *k)) info = 8; else if (*ldB < MAX(1, notransB ? *k : *n)) info = 10; else if (*ldC < MAX(1, *n)) info = 13; if (info) { LAPACK(xerbla)("CGEMMT", &info); return; } // Clean char * arguments const char cleanuplo = lower ? 'L' : 'U'; const char cleantransA = notransA ? 'N' : (tranA ? 'T' : 'C'); const char cleantransB = notransB ? 'N' : (tranB ? 'T' : 'C'); // Recursive kernel RELAPACK_cgemmt_rec(&cleanuplo, &cleantransA, &cleantransB, n, k, alpha, A, ldA, B, ldB, beta, C, ldC); #endif }
inline void LU<double> ( int m, int n, double* A, int lda, int* pivot ) { int info; LAPACK(dgetrf)( &m, &n, A, &lda, pivot, &info ); }
inline void LU<scomplex> ( int m, int n, scomplex* A, int lda, int* pivot ) { int info; LAPACK(cgetrf)( &m, &n, A, &lda, pivot, &info ); }
inline void LU<float> ( int m, int n, float* A, int lda, int* pivot ) { int info; LAPACK(sgetrf)( &m, &n, A, &lda, pivot, &info ); }
inline void InvertLU<float> ( int m, float* A, int lda, const int* pivot, float* work, int lwork ) { int info; LAPACK(sgetri)( &m, A, &lda, pivot, work, &lwork, &info ); }
inline void InvertLU<double> ( int m, double* A, int lda, const int* pivot, double* work, int lwork ) { int info; LAPACK(dgetri)( &m, A, &lda, pivot, work, &lwork, &info ); }
inline void InvertLU<dcomplex> ( int m, dcomplex* A, int lda, const int* pivot, dcomplex* work, int lwork ) { int info; LAPACK(zgetri)( &m, A, &lda, pivot, work, &lwork, &info ); }
void DivideAndConquerSVD ( int m, int n, dcomplex* A, int lda, double* s, dcomplex* U, int ldu, dcomplex* VAdj, int ldva ) { #ifndef RELEASE PushCallStack("lapack::DivideAndConquerSVD"); #endif if( m==0 || n==0 ) { #ifndef RELEASE PopCallStack(); #endif return; } const char jobz='S'; int lwork=-1, info; dcomplex dummyWork; const int k = std::min(m,n); const int K = std::max(m,n); const int lrwork = k*std::max(5*k+7,2*K+2*k+1); std::vector<double> rwork(lrwork); std::vector<int> iwork(8*k); LAPACK(zgesdd) ( &jobz, &m, &n, A, &lda, s, U, &ldu, VAdj, &ldva, &dummyWork, &lwork, &rwork[0], &iwork[0], &info ); lwork = dummyWork.real; std::vector<dcomplex> work(lwork); LAPACK(zgesdd) ( &jobz, &m, &n, A, &lda, s, U, &ldu, VAdj, &ldva, &work[0], &lwork, &rwork[0], &iwork[0], &info ); if( info < 0 ) { std::ostringstream msg; msg << "Argument " << -info << " had illegal value"; throw std::logic_error( msg.str().c_str() ); } else if( info > 0 ) { throw std::runtime_error("zgesdd's updating process failed"); } #ifndef RELEASE PopCallStack(); #endif }
void DivideAndConquerSVD ( int m, int n, float* A, int lda, float* s, float* U, int ldu, float* VTrans, int ldvt ) { #ifndef RELEASE PushCallStack("lapack::DivideAndConquerSVD"); #endif if( m==0 || n==0 ) { #ifndef RELEASE PopCallStack(); #endif return; } const char jobz='S'; int lwork=-1, info; float dummyWork; const int k = std::min(m,n); std::vector<int> iwork(8*k); LAPACK(sgesdd) ( &jobz, &m, &n, A, &lda, s, U, &ldu, VTrans, &ldvt, &dummyWork, &lwork, &iwork[0], &info ); lwork = dummyWork; std::vector<float> work(lwork); LAPACK(sgesdd) ( &jobz, &m, &n, A, &lda, s, U, &ldu, VTrans, &ldvt, &work[0], &lwork, &iwork[0], &info ); if( info < 0 ) { std::ostringstream msg; msg << "Argument " << -info << " had illegal value"; throw std::logic_error( msg.str().c_str() ); } else if( info > 0 ) { throw std::runtime_error("sgesdd's updating process failed"); } #ifndef RELEASE PopCallStack(); #endif }
void QRSVD ( int m, int n, scomplex* A, int lda, float* s, scomplex* U, int ldu, scomplex* VAdj, int ldva ) { #ifndef RELEASE PushCallStack("lapack::QRSVD"); #endif if( m==0 || n==0 ) { #ifndef RELEASE PopCallStack(); #endif return; } const char jobu='S', jobva='S'; int lwork=-1, info; const int k = std::min(m,n); std::vector<float> rwork(5*k); scomplex dummyWork; LAPACK(cgesvd) ( &jobu, &jobva, &m, &n, A, &lda, s, U, &ldu, VAdj, &ldva, &dummyWork, &lwork, &rwork[0], &info ); lwork = dummyWork.real; std::vector<scomplex> work(lwork); LAPACK(cgesvd) ( &jobu, &jobva, &m, &n, A, &lda, s, U, &ldu, VAdj, &ldva, &work[0], &lwork, &rwork[0], &info ); if( info < 0 ) { std::ostringstream msg; msg << "Argument " << -info << " had illegal value"; throw std::logic_error( msg.str().c_str() ); } else if( info > 0 ) { throw std::runtime_error("cgesvd's updating process failed"); } #ifndef RELEASE PopCallStack(); #endif }
void HessenbergEig( int n, float* H, int ldh, scomplex* w ) { #ifndef RELEASE PushCallStack("lapack::HessenbergEig"); #endif if( n == 0 ) { #ifndef RELEASE PopCallStack(); #endif return; } const char job='E', compz='N'; int ilo=1, ihi=n; int fakeLDim=1, lwork=-1, info; float dummyWork; std::vector<float> wr( n ), wi( n ); LAPACK(shseqr) ( &job, &compz, &n, &ilo, &ihi, H, &ldh, &wr[0], &wi[0], 0, &fakeLDim, &dummyWork, &lwork, &info ); lwork = dummyWork; std::vector<float> work(lwork); LAPACK(shseqr) ( &job, &compz, &n, &ilo, &ihi, H, &ldh, &wr[0], &wi[0], 0, &fakeLDim, &work[0], &lwork, &info ); if( info < 0 ) { std::ostringstream msg; msg << "Argument " << -info << " had illegal value"; throw std::logic_error( msg.str().c_str() ); } else if( info > 0 ) { throw std::runtime_error("shseqr's failed to compute all eigenvalues"); } for( int i=0; i<n; ++i ) w[i] = elem::Complex<float>(wr[i],wi[i]); #ifndef RELEASE PopCallStack(); #endif }
/** DGETRF computes an LU factorization of a general M-by-N matrix A using partial pivoting with row interchanges. * * This routine is functionally equivalent to LAPACK's dgetrf. * For details on its interface, see * http://www.netlib.org/lapack/explore-html/d3/d6a/dgetrf_8f.html * */ void RELAPACK_dgetrf( const int *m, const int *n, double *A, const int *ldA, int *ipiv, int *info ) { // Check arguments *info = 0; if (*m < 0) *info = -1; else if (*n < 0) *info = -2; else if (*ldA < MAX(1, *n)) *info = -4; if (*info) { const int minfo = -*info; LAPACK(xerbla)("DGETRF", &minfo); return; } const int sn = MIN(*m, *n); RELAPACK_dgetrf_rec(m, &sn, A, ldA, ipiv, info); // Right remainder if (*m < *n) { // Constants const double ONE[] = { 1. }; const int iONE[] = { 1. }; // Splitting const int rn = *n - *m; // A_L A_R const double *const A_L = A; double *const A_R = A + *ldA * *m; // A_R = apply(ipiv, A_R) LAPACK(dlaswp)(&rn, A_R, ldA, iONE, m, ipiv, iONE); // A_R = A_S \ A_R BLAS(dtrsm)("L", "L", "N", "U", m, &rn, ONE, A_L, ldA, A_R, ldA); } }
/** SPBTRF computes the Cholesky factorization of a real symmetric positive definite band matrix A. * * This routine is functionally equivalent to LAPACK's spbtrf. * For details on its interface, see * http://www.netlib.org/lapack/explore-html/d1/d22/spbtrf_8f.html * */ void RELAPACK_spbtrf( const char *uplo, const int *n, const int *kd, float *Ab, const int *ldAb, int *info ) { // Check arguments const int lower = LAPACK(lsame)(uplo, "L"); const int upper = LAPACK(lsame)(uplo, "U"); *info = 0; if (!lower && !upper) *info = -1; else if (*n < 0) *info = -2; else if (*kd < 0) *info = -3; else if (*ldAb < *kd + 1) *info = -5; if (*info) { const int minfo = -*info; LAPACK(xerbla)("SPBTRF", &minfo); return; } // Clean char * arguments const char cleanuplo = lower ? 'L' : 'U'; // Constant const float ZERO[] = { 0. }; // Allocate work space const int n1 = SREC_SPLIT(*n); const int mWork = (*kd > n1) ? (lower ? *n - *kd : n1) : *kd; const int nWork = (*kd > n1) ? (lower ? n1 : *n - *kd) : *kd; float *Work = malloc(mWork * nWork * sizeof(float)); LAPACK(slaset)(uplo, &mWork, &nWork, ZERO, ZERO, Work, &mWork); // Recursive kernel RELAPACK_spbtrf_rec(&cleanuplo, n, kd, Ab, ldAb, Work, &mWork, info); // Free work space free(Work); }
void QRSVD ( int m, int n, double* A, int lda, double* s, double* U, int ldu, double* VTrans, int ldvt ) { #ifndef RELEASE PushCallStack("lapack::QRSVD"); #endif if( m==0 || n==0 ) { #ifndef RELEASE PopCallStack(); #endif return; } const char jobu='S', jobvt='S'; int lwork=-1, info; double dummyWork; LAPACK(dgesvd) ( &jobu, &jobvt, &m, &n, A, &lda, s, U, &ldu, VTrans, &ldvt, &dummyWork, &lwork, &info ); lwork = dummyWork; std::vector<double> work(lwork); LAPACK(dgesvd) ( &jobu, &jobvt, &m, &n, A, &lda, s, U, &ldu, VTrans, &ldvt, &work[0], &lwork, &info ); if( info < 0 ) { std::ostringstream msg; msg << "Argument " << -info << " had illegal value"; throw std::logic_error( msg.str().c_str() ); } else if( info > 0 ) { throw std::runtime_error("dgesvd's updating process failed"); } #ifndef RELEASE PopCallStack(); #endif }
void SingularValues( int m, int n, dcomplex* A, int lda, double* s ) { #ifndef RELEASE PushCallStack("lapack::SingularValues"); #endif if( m==0 || n==0 ) { #ifndef RELEASE PopCallStack(); #endif return; } const char jobu='N', jobva='N'; int fakeLDim=1, lwork=-1, info; dcomplex dummyWork; const int k = std::min(m,n); std::vector<double> rwork(5*k); LAPACK(zgesvd) ( &jobu, &jobva, &m, &n, A, &lda, s, 0, &fakeLDim, 0, &fakeLDim, &dummyWork, &lwork, &rwork[0], &info ); lwork = dummyWork.real; std::vector<dcomplex> work(lwork); LAPACK(zgesvd) ( &jobu, &jobva, &m, &n, A, &lda, s, 0, &fakeLDim, 0, &fakeLDim, &work[0], &lwork, &rwork[0], &info ); if( info < 0 ) { std::ostringstream msg; msg << "Argument " << -info << " had illegal value"; throw std::logic_error( msg.str().c_str() ); } else if( info > 0 ) { throw std::runtime_error("zgesvd's updating process failed"); } #ifndef RELEASE PopCallStack(); #endif }
void SingularValues( int m, int n, float* A, int lda, float* s ) { #ifndef RELEASE PushCallStack("lapack::SingularValues"); #endif if( m==0 || n==0 ) { #ifndef RELEASE PopCallStack(); #endif return; } const char jobu='N', jobvt='N'; int fakeLDim=1, lwork=-1, info; float dummyWork; LAPACK(sgesvd) ( &jobu, &jobvt, &m, &n, A, &lda, s, 0, &fakeLDim, 0, &fakeLDim, &dummyWork, &lwork, &info ); lwork = dummyWork; std::vector<float> work(lwork); LAPACK(sgesvd) ( &jobu, &jobvt, &m, &n, A, &lda, s, 0, &fakeLDim, 0, &fakeLDim, &work[0], &lwork, &info ); if( info < 0 ) { std::ostringstream msg; msg << "Argument " << -info << " had illegal value"; throw std::logic_error( msg.str().c_str() ); } else if( info > 0 ) { throw std::runtime_error("sgesvd's updating process failed"); } #ifndef RELEASE PopCallStack(); #endif }
void HessenbergEig( int n, dcomplex* H, int ldh, dcomplex* w ) { #ifndef RELEASE PushCallStack("lapack::HessenbergEig"); #endif if( n == 0 ) { #ifndef RELEASE PopCallStack(); #endif return; } const char job='E', compz='N'; int ilo=1, ihi=n; int fakeLDim=1, lwork=-1, info; dcomplex dummyWork; LAPACK(zhseqr) ( &job, &compz, &n, &ilo, &ihi, H, &ldh, w, 0, &fakeLDim, &dummyWork, &lwork, &info ); lwork = dummyWork.real; std::vector<dcomplex> work(lwork); LAPACK(zhseqr) ( &job, &compz, &n, &ilo, &ihi, H, &ldh, w, 0, &fakeLDim, &work[0], &lwork, &info ); if( info < 0 ) { std::ostringstream msg; msg << "Argument " << -info << " had illegal value"; throw std::logic_error( msg.str().c_str() ); } else if( info > 0 ) { throw std::runtime_error("zhseqr's failed to compute all eigenvalues"); } #ifndef RELEASE PopCallStack(); #endif }
static void *eigval_subset_thread_r(void *argin) { /* from input argument */ int bl_size, rf_begin, rf_end; double *D, *DE2; double rtol1, rtol2, pivmin; double bl_spdiam; val_t *Wstruct; /* others */ int info, offset; double *W, *Werr, *Wgap; int *Windex; double *work; int *iwork; retrieve_auxarg2((auxarg2_t *) argin, &bl_size, &D, &DE2, &rf_begin, &rf_end, &Wstruct, &rtol1, &rtol2, &pivmin, &bl_spdiam); /* malloc work space */ work = (double *) malloc( 2*bl_size * sizeof(double) ); assert(work != NULL); iwork = (int *) malloc( 2*bl_size * sizeof(int) ); assert(iwork != NULL); W = Wstruct->W; Werr = Wstruct->Werr; Wgap = Wstruct->Wgap; Windex = Wstruct->Windex; /* special case of only one eigenvalue */ if (rf_begin == rf_end) Wgap[rf_begin] = 0.0; offset = Windex[rf_begin] - 1; /* call bisection routine to refine the eigenvalues */ LAPACK(dlarrb) (&bl_size, D, DE2, &Windex[rf_begin], &Windex[rf_end], &rtol1, &rtol2, &offset, &W[rf_begin], &Wgap[rf_begin], &Werr[rf_begin], work, iwork, &pivmin, &bl_spdiam, &bl_size, &info); assert(info == 0); /* clean up */ free(work); free(iwork); return(NULL); }
void BidiagQRAlg ( char uplo, int n, int numColsVAdj, int numRowsU, double* d, double* e, dcomplex* VAdj, int ldVAdj, dcomplex* U, int ldU ) { #ifndef RELEASE PushCallStack("lapack::BidiagQRAlg"); #endif if( n==0 ) { #ifndef RELEASE PopCallStack(); #endif return; } int info; dcomplex* C=0; const int numColsC=0, ldC=1; std::vector<double> work( 4*n ); LAPACK(zbdsqr) ( &uplo, &n, &numColsVAdj, &numRowsU, &numColsC, d, e, VAdj, &ldVAdj, U, &ldU, C, &ldC, &work[0], &info ); if( info < 0 ) { std::ostringstream msg; msg << "Argument " << -info << " had illegal value"; throw std::logic_error( msg.str().c_str() ); } else if( info > 0 ) { std::ostringstream msg; msg << "zbdsqr had " << info << " elements of e not converge"; throw std::runtime_error( msg.str().c_str() ); } #ifndef RELEASE PopCallStack(); #endif }
/** CSYTRF_ROOK computes the factorization of a complex symmetric matrix A using the bounded Bunch-Kaufman ("rook") diagonal pivoting method. * * This routine is functionally equivalent to LAPACK's csytrf_rook. * For details on its interface, see * http://www.netlib.org/lapack/explore-html/d8/dc8/csytrf__rook_8f.html * */ void RELAPACK_csytrf_rook( const char *uplo, const int *n, float *A, const int *ldA, int *ipiv, float *Work, const int *lWork, int *info ) { // Required work size const int cleanlWork = *n * (*n / 2); int minlWork = cleanlWork; #if XSYTRF_ALLOW_MALLOC minlWork = 1; #endif // Check arguments const int lower = LAPACK(lsame)(uplo, "L"); const int upper = LAPACK(lsame)(uplo, "U"); *info = 0; if (!lower && !upper) *info = -1; else if (*n < 0) *info = -2; else if (*ldA < MAX(1, *n)) *info = -4; else if (*lWork < minlWork && *lWork != -1) *info = -7; else if (*lWork == -1) { // Work size query *Work = cleanlWork; return; } // Ensure Work size float *cleanWork = Work; #if XSYTRF_ALLOW_MALLOC if (!*info && *lWork < cleanlWork) { cleanWork = malloc(cleanlWork * 2 * sizeof(float)); if (!cleanWork) *info = -7; } #endif if (*info) { const int minfo = -*info; LAPACK(xerbla)("CSYTRF", &minfo); return; } // Clean char * arguments const char cleanuplo = lower ? 'L' : 'U'; // Dummy argument int nout; // Recursive kernel RELAPACK_csytrf_rook_rec(&cleanuplo, n, n, &nout, A, ldA, ipiv, cleanWork, n, info); #if XSYTRF_ALLOW_MALLOC if (cleanWork != Work) free(cleanWork); #endif }
/** csytrf_rook's recursive compute kernel */ static void RELAPACK_csytrf_rook_rec( const char *uplo, const int *n_full, const int *n, int *n_out, float *A, const int *ldA, int *ipiv, float *Work, const int *ldWork, int *info ) { // top recursion level? const int top = *n_full == *n; if (*n <= MAX(CROSSOVER_CSYTRF_ROOK, 3)) { // Unblocked if (top) { LAPACK(csytf2)(uplo, n, A, ldA, ipiv, info); *n_out = *n; } else RELAPACK_csytrf_rook_rec2(uplo, n_full, n, n_out, A, ldA, ipiv, Work, ldWork, info); return; } int info1, info2; // Constants const float ONE[] = { 1., 0. }; const float MONE[] = { -1., 0. }; const int iONE[] = { 1 }; const int n_rest = *n_full - *n; if (*uplo == 'L') { // Splitting (setup) int n1 = CREC_SPLIT(*n); int n2 = *n - n1; // Work_L * float *const Work_L = Work; // recursion(A_L) int n1_out; RELAPACK_csytrf_rook_rec(uplo, n_full, &n1, &n1_out, A, ldA, ipiv, Work_L, ldWork, &info1); n1 = n1_out; // Splitting (continued) n2 = *n - n1; const int n_full2 = *n_full - n1; // * * // A_BL A_BR // A_BL_B A_BR_B float *const A_BL = A + 2 * n1; float *const A_BR = A + 2 * *ldA * n1 + 2 * n1; float *const A_BL_B = A + 2 * *n; float *const A_BR_B = A + 2 * *ldA * n1 + 2 * *n; // * * // Work_BL Work_BR // * * // (top recursion level: use Work as Work_BR) float *const Work_BL = Work + 2 * n1; float *const Work_BR = top ? Work : Work + 2 * *ldWork * n1 + 2 * n1; const int ldWork_BR = top ? n2 : *ldWork; // ipiv_T // ipiv_B int *const ipiv_B = ipiv + n1; // A_BR = A_BR - A_BL Work_BL' RELAPACK_cgemmt(uplo, "N", "T", &n2, &n1, MONE, A_BL, ldA, Work_BL, ldWork, ONE, A_BR, ldA); BLAS(cgemm)("N", "T", &n_rest, &n2, &n1, MONE, A_BL_B, ldA, Work_BL, ldWork, ONE, A_BR_B, ldA); // recursion(A_BR) int n2_out; RELAPACK_csytrf_rook_rec(uplo, &n_full2, &n2, &n2_out, A_BR, ldA, ipiv_B, Work_BR, &ldWork_BR, &info2); if (n2_out != n2) { // undo 1 column of updates const int n_restp1 = n_rest + 1; // last column of A_BR float *const A_BR_r = A_BR + 2 * *ldA * n2_out + 2 * n2_out; // last row of A_BL float *const A_BL_b = A_BL + 2 * n2_out; // last row of Work_BL float *const Work_BL_b = Work_BL + 2 * n2_out; // A_BR_r = A_BR_r + A_BL_b Work_BL_b' BLAS(cgemv)("N", &n_restp1, &n1, ONE, A_BL_b, ldA, Work_BL_b, ldWork, ONE, A_BR_r, iONE); } n2 = n2_out; // shift pivots int i; for (i = 0; i < n2; i++) if (ipiv_B[i] > 0) ipiv_B[i] += n1; else ipiv_B[i] -= n1; *info = info1 || info2; *n_out = n1 + n2; } else { // Splitting (setup) int n2 = CREC_SPLIT(*n); int n1 = *n - n2; // * Work_R // (top recursion level: use Work as Work_R) float *const Work_R = top ? Work : Work + 2 * *ldWork * n1; // recursion(A_R) int n2_out; RELAPACK_csytrf_rook_rec(uplo, n_full, &n2, &n2_out, A, ldA, ipiv, Work_R, ldWork, &info2); const int n2_diff = n2 - n2_out; n2 = n2_out; // Splitting (continued) n1 = *n - n2; const int n_full1 = *n_full - n2; // * A_TL_T A_TR_T // * A_TL A_TR // * * * float *const A_TL_T = A + 2 * *ldA * n_rest; float *const A_TR_T = A + 2 * *ldA * (n_rest + n1); float *const A_TL = A + 2 * *ldA * n_rest + 2 * n_rest; float *const A_TR = A + 2 * *ldA * (n_rest + n1) + 2 * n_rest; // Work_L * // * Work_TR // * * // (top recursion level: Work_R was Work) float *const Work_L = Work; float *const Work_TR = Work + 2 * *ldWork * (top ? n2_diff : n1) + 2 * n_rest; const int ldWork_L = top ? n1 : *ldWork; // A_TL = A_TL - A_TR Work_TR' RELAPACK_cgemmt(uplo, "N", "T", &n1, &n2, MONE, A_TR, ldA, Work_TR, ldWork, ONE, A_TL, ldA); BLAS(cgemm)("N", "T", &n_rest, &n1, &n2, MONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, ldA); // recursion(A_TL) int n1_out; RELAPACK_csytrf_rook_rec(uplo, &n_full1, &n1, &n1_out, A, ldA, ipiv, Work_L, &ldWork_L, &info1); if (n1_out != n1) { // undo 1 column of updates const int n_restp1 = n_rest + 1; // A_TL_T_l = A_TL_T_l + A_TR_T Work_TR_t' BLAS(cgemv)("N", &n_restp1, &n2, ONE, A_TR_T, ldA, Work_TR, ldWork, ONE, A_TL_T, iONE); } n1 = n1_out; *info = info2 || info1; *n_out = n1 + n2; } }
double SafeNorm( double alpha, double beta ) { return LAPACK(dlapy2)( &alpha, &beta ); }
float SafeNorm( float alpha, float beta, float gamma ) { return LAPACK(slapy3)( &alpha, &beta, &gamma ); }
double SafeNorm( double alpha, double beta, double gamma ) { return LAPACK(dlapy3)( &alpha, &beta, &gamma ); }
void ComputeGivens ( float phi, float gamma, float* c, float* s, float* rho ) { LAPACK(slartg)( &phi, &gamma, c, s, rho ); }
void ComputeGivens ( dcomplex phi, dcomplex gamma, double* c, dcomplex* s, dcomplex* rho ) { LAPACK(zlartg)( &phi, &gamma, c, s, rho ); }
void ComputeGivens ( double phi, double gamma, double* c, double* s, double* rho ) { LAPACK(dlartg)( &phi, &gamma, c, s, rho ); }
void ComputeGivens ( scomplex phi, scomplex gamma, float* c, scomplex* s, scomplex* rho ) { LAPACK(clartg)( &phi, &gamma, c, s, rho ); }