int buffer_add (void) { ret_t ret; uint32_t len; chula_buffer_t *buf = NULL; WORK(chula_buffer_new (&buf)); for (uint32_t n=0; n<20; n++) { len = buf->len; ret = chula_buffer_add_str (buf, "testing "); assert (((ret == ret_ok) && (buf->len > len)) || ((ret == ret_nomem) && (buf->len <= len))); } for (uint32_t n=0; n<20; n++) { len = buf->len; ret = chula_buffer_add_char (buf, 'a'); assert (((ret == ret_ok) && (buf->len > len)) || ((ret == ret_nomem) && (buf->len <= len))); } for (uint32_t n=0; n<20; n++) { len = buf->len; ret = chula_buffer_add_long10 (buf, 123456789); assert (((ret == ret_ok) && (buf->len > len)) || ((ret == ret_nomem) && (buf->len <= len))); } chula_buffer_free(buf); return 0; }
static void* simple_spinlock(void* ignore) { (void)ignore; for (uint64_t i = 0; i < ITERATIONS; ++i) { spinlock_lock(&simple_spin); WORK(); spinlock_unlock(&simple_spin); } return 0; }
static void* simple_ticketlock_yield(void* ignore) { (void)ignore; for (uint64_t i = 0; i < ITERATIONS; ++i) { ticketlock_yieldlock(&simple_ticket); WORK(); ticketlock_unlock(&simple_ticket); } return 0; }
int ccdl::ConstrainedLeastSquaresFit ( int const nobs, int const nparam, double const * A_obs_by_param, double * x_param, double const * b_obs, int const ncon, double const * D_con_by_param, double const * c_con ) { // min (xt.At-bt).(A.x-b) s.t. D.x = c std::vector<double> A( A_obs_by_param, A_obs_by_param + nparam*nobs ); std::vector<double> b( b_obs, b_obs + nobs ); std::vector<double> D( D_con_by_param, D_con_by_param + ncon*nparam ); std::vector<double> c( c_con, c_con + ncon ); int LWORK = -1; int INFO = 0; double twork = 0; dgglse_( &nobs, &nparam, &ncon, A.data(), &nobs, D.data(), &ncon, b.data(), c.data(), x_param, &twork, &LWORK, &INFO ); if ( INFO == 0 ) { LWORK = twork+1; std::vector<double> WORK( LWORK, 0. ); #ifdef PDBG std::printf("dgglse_\n"); #endif dgglse_( &nobs, &nparam, &ncon, A.data(), &nobs, D.data(), &ncon, b.data(), c.data(), x_param, WORK.data(), &LWORK, &INFO ); #ifdef PDBG std::printf("return %i\n",INFO); #endif if ( INFO > 0 ) throw ccdl::SingularMatrixException("ccdl::ConstrainedLeastSquaredFit"); } else { std::fill( x_param, x_param + nparam, 0. ); }; return INFO; }
// implement plain ql matrix ql(vector &D,vector &E) { int N=D.Sz; char COMPZ='I'; matrix Z(N,N); for (int i=0;i<N;i++) Z(i,i)=1.; int INFO=0; int LDZ=N; vector WORK(2*N-2); FORTRAN(dsteqr)(&COMPZ,&N,D.TheVector,E.TheVector,Z.TheMatrix,&LDZ, WORK.TheVector,&INFO); return Z; }
int ccdl::LeastSquaresSvdFit ( int const nobs, int const nparam, double const * A_obs_by_param, double * x_param, double const * b_obs, double TOL ) { // min (xt.At-bt).(A.x-b) std::vector<double> A( A_obs_by_param, A_obs_by_param + nparam*nobs ); int nmax = std::max( nparam, nobs ); std::vector<double> X( nmax, 0. ); std::copy( b_obs, b_obs + nobs, X.data() ); std::vector<double> S( nmax, 0. ); int LWORK = -1; std::vector<int> iwork(1,0); int INFO = 0; double twork = 0; int rank = 0; int nrhs=1; dgelsd_( &nobs, &nparam, &nrhs, A.data(), &nobs, X.data(), &nmax, S.data(), &TOL, &rank, &twork, &LWORK, iwork.data(), &INFO ); if ( INFO == 0 ) { LWORK = twork+1; std::vector<double> WORK( LWORK, 0. ); int LIWORK = iwork[0]; iwork.resize( LIWORK ); #ifdef PDBG std::printf("dgelsd_\n"); #endif dgelsd_( &nobs, &nparam, &nrhs, A.data(), &nobs, X.data(), &nmax, S.data(), &TOL, &rank, WORK.data(), &LWORK, iwork.data(), &INFO ); #ifdef PDBG std::printf("return %i\n",INFO); #endif std::copy( X.data(), X.data() + nparam, x_param ); } else { std::fill( x_param, x_param + nparam, 0. ); }; return INFO; }
int buffer_operations (void) { ret_t ret; uint32_t len; chula_buffer_t *buf = NULL; WORK(chula_buffer_new (&buf)); for (uint32_t n=2; n<12; n++) { WORK(chula_buffer_clean (buf)); WORK(chula_buffer_add_str (buf, "abcdefghijklmopqrstuvwxyz")); len = buf->len; ret = chula_buffer_multiply (buf, n); assert (((ret == ret_ok) && (buf->len > len)) || ((ret == ret_nomem) && (buf->len <= len))); } chula_buffer_free(buf); return 0; }
int buffer_dup (void) { ret_t ret; chula_buffer_t *buf = NULL; WORK(chula_buffer_new (&buf)); WORK(chula_buffer_add_str (buf, "testing ")); for (uint32_t n=0; n<5; n++) { chula_buffer_t *dup = NULL; ret = chula_buffer_dup (buf, &dup); assert (((ret == ret_nomem) && (dup == NULL)) || ((ret == ret_ok) && (dup != NULL) && (dup->buf != NULL))); chula_buffer_free (dup); } chula_buffer_free(buf); return 0; }
vector hermdl(cmatrix& a) { char UPLO='U'; char JOBZ='V'; int INFO=0; int N=a.Rows; int LDA=N; int LWORK=2*N; cvector WORK(LWORK); vector W(N); vector RWORK(3*N); FORTRAN(cheev)(&JOBZ,&UPLO,&N,a.TheMatrix,&LDA,W.TheVector,WORK.TheVector, &LWORK,RWORK.TheVector,&INFO); if (INFO != 0) cerr<<"diagonalization failed"<<endl; return W; }
bool fullMatrix<double>::svd(fullMatrix<double> &V, fullVector<double> &S) { fullMatrix<double> VT(V.size2(), V.size1()); int M = size1(), N = size2(), LDA = size1(), LDVT = VT.size1(), info; int lwork = std::max(3 * std::min(M, N) + std::max(M, N), 5 * std::min(M, N)); fullVector<double> WORK(lwork); F77NAME(dgesvd)("O", "A", &M, &N, _data, &LDA, S._data, _data, &LDA, VT._data, &LDVT, WORK._data, &lwork, &info); V = VT.transpose(); if(info == 0) return true; if(info > 0) Msg::Error("SVD did not converge"); else Msg::Error("Wrong %d-th argument in SVD decomposition", -info); return false; }
vector diag(matrix& a) // returns eigenvalues in a vector and transforms the matrix argument // for symmetric matrices { int N=a.Rows; char UPLO='U'; char JOBZ='V'; int INFO=0; int LDA=N; int LWORK=3*N; vector WORK(LWORK); vector W(N); FORTRAN(dsyev)(&JOBZ,&UPLO,&N,a.TheMatrix,&LDA,W.TheVector,WORK.TheVector, &LWORK,&INFO); if (INFO != 0) cerr<<"diagonalization failed"<<endl; return W; }
int ccdl::LeastSquaresFit ( int const nobs, int const nparam, double const * A_obs_by_param, double * x_param, double const * b_obs, double relative_accuracy_of_the_obs ) { // min (xt.At-bt).(A.x-b) std::vector<double> A( A_obs_by_param, A_obs_by_param + nparam*nobs ); int nmax = std::max( nparam, nobs ); std::vector<double> X( nmax, 0. ); std::copy( b_obs, b_obs + nobs, X.data() ); std::vector<int> jpvt( nparam, 0 ); int LWORK = -1; int INFO = 0; double twork = 0; int rank = 0; int nrhs=1; dgelsy_( &nobs, &nparam, &nrhs, A.data(), &nobs, X.data(), &nmax, jpvt.data(), &relative_accuracy_of_the_obs, &rank, &twork, &LWORK, &INFO ); if ( INFO == 0 ) { LWORK = twork+1; std::vector<double> WORK( LWORK, 0. ); #ifdef PDBG std::printf("dgelsy_\n"); #endif dgelsy_( &nobs, &nparam, &nrhs, A.data(), &nobs, X.data(), &nmax, jpvt.data(), &relative_accuracy_of_the_obs, &rank, WORK.data(), &LWORK, &INFO ); #ifdef PDBG std::printf("return %i\n",INFO); #endif std::copy( X.data(), X.data() + nparam, x_param ); } else { std::fill( x_param, x_param + nparam, 0. ); }; return INFO; }
void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Setup(const MultiVector& B, const MultiVector& Bc, RCP<const CrsGraph> Ppattern) { Ppattern_ = Ppattern; const RCP<const Map> uniqueMap = Ppattern_->getDomainMap(); const RCP<const Map> nonUniqueMap = Ppattern_->getColMap(); RCP<const Import> importer = ImportFactory::Build(uniqueMap, nonUniqueMap); const size_t NSDim = Bc.getNumVectors(); X_ = MultiVectorFactory::Build(nonUniqueMap, NSDim); X_->doImport(Bc, *importer, Xpetra::INSERT); size_t numRows = Ppattern_->getNodeNumRows(); XXtInv_.resize(numRows); Teuchos::SerialDenseVector<LO,SC> BcRow(NSDim, false); for (size_t i = 0; i < numRows; i++) { Teuchos::ArrayView<const LO> indices; Ppattern_->getLocalRowView(i, indices); size_t nnz = indices.size(); Teuchos::SerialDenseMatrix<LO,SC> locX(NSDim, nnz, false); for (size_t j = 0; j < nnz; j++) { for (size_t k = 0; k < NSDim; k++) BcRow[k] = X_->getData(k)[indices[j]]; Teuchos::setCol(BcRow, (LO)j, locX); } XXtInv_[i] = Teuchos::SerialDenseMatrix<LO,SC>(NSDim, NSDim, false); Teuchos::BLAS<LO,SC> blas; blas.GEMM(Teuchos::NO_TRANS, Teuchos::CONJ_TRANS, NSDim, NSDim, nnz, Teuchos::ScalarTraits<SC>::one(), locX.values(), locX.stride(), locX.values(), locX.stride(), Teuchos::ScalarTraits<SC>::zero(), XXtInv_[i].values(), XXtInv_[i].stride()); Teuchos::LAPACK<LO,SC> lapack; LO info, lwork = 3*NSDim; ArrayRCP<LO> IPIV(NSDim); ArrayRCP<SC> WORK(lwork); lapack.GETRF(NSDim, NSDim, XXtInv_[i].values(), XXtInv_[i].stride(), IPIV.get(), &info); lapack.GETRI(NSDim, XXtInv_[i].values(), XXtInv_[i].stride(), IPIV.get(), WORK.get(), lwork, &info); } }
cvector diag(cmatrix& a,cmatrix &VL,cmatrix &VR) // returns eigenvalues in a vector and transforms the matrix argument { int N=a.Rows; char JOBVL='V'; char JOBVR='V'; int INFO=0; int LDA=N; int LDVL=N; int LDVR=N; int LWORK=2*N; cvector WORK(LWORK); cvector W(N); vector RWORK(2*N); FORTRAN(zgeev)(&JOBVL,&JOBVR,&N,a.TheMatrix,&LDA,W.TheVector,VL.TheMatrix,&LDVL, VR.TheMatrix,&LDVR,WORK.TheVector,&LWORK, RWORK.TheVector,&INFO); if (INFO != 0) cerr<<"diagonalization failed"<<endl; return W; }
// construct identity matrix vector diaggen(matrix& a) { int N=a.Rows; char JOBVL='V'; char JOBVR='V'; int INFO=0; int LDA=N; vector WR(N); vector WI(N); int LDVL=N; int LDVR=N; matrix VL(LDVL,N); matrix VR(LDVR,N); int LWORK=4*N; vector WORK(LWORK); vector W(N); FORTRAN(dgeev)(&JOBVL,&JOBVR,&N,a.TheMatrix,&LDA,WR.TheVector, WI.TheVector,VL.TheMatrix,&LDVL,VR.TheMatrix,&LDVR,WORK.TheVector, &LWORK,&INFO); if (INFO != 0) cerr<<"diagonalization failed"<<endl; return WR; }
int buffer_encoders (void) { ret_t ret; chula_buffer_t *buf = NULL; chula_buffer_t encoded = CHULA_BUF_INIT; #define ASSERT_ENCODED \ assert (((ret == ret_ok) && (encoded.len > 0) && (encoded.buf != NULL)) || \ ((ret == ret_nomem) && (encoded.len == 0) && (encoded.buf == NULL))) /* Base64 */ WORK(chula_buffer_new (&buf)); WORK(chula_buffer_add_str (buf, "abcdefghijklmopqrstuvwxyz")); ret = chula_buffer_encode_base64 (buf, &encoded); ASSERT_ENCODED; /* MD5 */ WORK(chula_buffer_mrproper(&encoded)); ret = chula_buffer_encode_md5 (buf, &encoded); ASSERT_ENCODED; /* SHA1 */ WORK(chula_buffer_mrproper(&encoded)); ret = chula_buffer_encode_sha1 (buf, &encoded); ASSERT_ENCODED; WORK(chula_buffer_mrproper(&encoded)); ret = chula_buffer_encode_sha1_base64 (buf, &encoded); ASSERT_ENCODED; /* SHA512 */ WORK(chula_buffer_mrproper(&encoded)); ret = chula_buffer_encode_sha512 (buf, &encoded); ASSERT_ENCODED; chula_buffer_mrproper (&encoded); chula_buffer_free(buf); return 0; }
/* Subroutine */ int dormbr_(char *vect, char *side, char *trans, integer *m, integer *n, integer *k, doublereal *a, integer *lda, doublereal *tau, doublereal *c, integer *ldc, doublereal *work, integer *lwork, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= If VECT = 'Q', DORMBR overwrites the general real M-by-N matrix C with SIDE = 'L' SIDE = 'R' TRANS = 'N': Q * C C * Q TRANS = 'T': Q**T * C C * Q**T If VECT = 'P', DORMBR overwrites the general real M-by-N matrix C with SIDE = 'L' SIDE = 'R' TRANS = 'N': P * C C * P TRANS = 'T': P**T * C C * P**T Here Q and P**T are the orthogonal matrices determined by DGEBRD when reducing a real matrix A to bidiagonal form: A = Q * B * P**T. Q and P**T are defined as products of elementary reflectors H(i) and G(i) respectively. Let nq = m if SIDE = 'L' and nq = n if SIDE = 'R'. Thus nq is the order of the orthogonal matrix Q or P**T that is applied. If VECT = 'Q', A is assumed to have been an NQ-by-K matrix: if nq >= k, Q = H(1) H(2) . . . H(k); if nq < k, Q = H(1) H(2) . . . H(nq-1). If VECT = 'P', A is assumed to have been a K-by-NQ matrix: if k < nq, P = G(1) G(2) . . . G(k); if k >= nq, P = G(1) G(2) . . . G(nq-1). Arguments ========= VECT (input) CHARACTER*1 = 'Q': apply Q or Q**T; = 'P': apply P or P**T. SIDE (input) CHARACTER*1 = 'L': apply Q, Q**T, P or P**T from the Left; = 'R': apply Q, Q**T, P or P**T from the Right. TRANS (input) CHARACTER*1 = 'N': No transpose, apply Q or P; = 'T': Transpose, apply Q**T or P**T. M (input) INTEGER The number of rows of the matrix C. M >= 0. N (input) INTEGER The number of columns of the matrix C. N >= 0. K (input) INTEGER If VECT = 'Q', the number of columns in the original matrix reduced by DGEBRD. If VECT = 'P', the number of rows in the original matrix reduced by DGEBRD. K >= 0. A (input) DOUBLE PRECISION array, dimension (LDA,min(nq,K)) if VECT = 'Q' (LDA,nq) if VECT = 'P' The vectors which define the elementary reflectors H(i) and G(i), whose products determine the matrices Q and P, as returned by DGEBRD. LDA (input) INTEGER The leading dimension of the array A. If VECT = 'Q', LDA >= max(1,nq); if VECT = 'P', LDA >= max(1,min(nq,K)). TAU (input) DOUBLE PRECISION array, dimension (min(nq,K)) TAU(i) must contain the scalar factor of the elementary reflector H(i) or G(i) which determines Q or P, as returned by DGEBRD in the array argument TAUQ or TAUP. C (input/output) DOUBLE PRECISION array, dimension (LDC,N) On entry, the M-by-N matrix C. On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q or P*C or P**T*C or C*P or C*P**T. LDC (input) INTEGER The leading dimension of the array C. LDC >= max(1,M). WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. If SIDE = 'L', LWORK >= max(1,N); if SIDE = 'R', LWORK >= max(1,M). For optimum performance LWORK >= N*NB if SIDE = 'L', and LWORK >= M*NB if SIDE = 'R', where NB is the optimal blocksize. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input arguments Parameter adjustments Function Body */ /* System generated locals */ integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; /* Local variables */ static logical left; extern logical lsame_(char *, char *); static integer iinfo, i1, i2, mi, ni, nq, nw; extern /* Subroutine */ int xerbla_(char *, integer *), dormlq_( char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); static logical notran; extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); static logical applyq; static char transt[1]; #define TAU(I) tau[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define C(I,J) c[(I)-1 + ((J)-1)* ( *ldc)] *info = 0; applyq = lsame_(vect, "Q"); left = lsame_(side, "L"); notran = lsame_(trans, "N"); /* NQ is the order of Q or P and NW is the minimum dimension of WORK */ if (left) { nq = *m; nw = *n; } else { nq = *n; nw = *m; } if (! applyq && ! lsame_(vect, "P")) { *info = -1; } else if (! left && ! lsame_(side, "R")) { *info = -2; } else if (! notran && ! lsame_(trans, "T")) { *info = -3; } else if (*m < 0) { *info = -4; } else if (*n < 0) { *info = -5; } else if (*k < 0) { *info = -6; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = 1, i__2 = min(nq,*k); if (applyq && *lda < max(1,nq) || ! applyq && *lda < max(i__1,i__2)) { *info = -8; } else if (*ldc < max(1,*m)) { *info = -11; } else if (*lwork < max(1,nw)) { *info = -13; } } if (*info != 0) { i__1 = -(*info); xerbla_("DORMBR", &i__1); return 0; } /* Quick return if possible */ WORK(1) = 1.; if (*m == 0 || *n == 0) { return 0; } if (applyq) { /* Apply Q */ if (nq >= *k) { /* Q was determined by a call to DGEBRD with nq >= k */ dormqr_(side, trans, m, n, k, &A(1,1), lda, &TAU(1), &C(1,1), ldc, &WORK(1), lwork, &iinfo); } else if (nq > 1) { /* Q was determined by a call to DGEBRD with nq < k */ if (left) { mi = *m - 1; ni = *n; i1 = 2; i2 = 1; } else { mi = *m; ni = *n - 1; i1 = 1; i2 = 2; } i__1 = nq - 1; dormqr_(side, trans, &mi, &ni, &i__1, &A(2,1), lda, &TAU(1) , &C(i1,i2), ldc, &WORK(1), lwork, &iinfo); } } else { /* Apply P */ if (notran) { *(unsigned char *)transt = 'T'; } else { *(unsigned char *)transt = 'N'; } if (nq > *k) { /* P was determined by a call to DGEBRD with nq > k */ dormlq_(side, transt, m, n, k, &A(1,1), lda, &TAU(1), &C(1,1), ldc, &WORK(1), lwork, &iinfo); } else if (nq > 1) { /* P was determined by a call to DGEBRD with nq <= k */ if (left) { mi = *m - 1; ni = *n; i1 = 2; i2 = 1; } else { mi = *m; ni = *n - 1; i1 = 1; i2 = 2; } i__1 = nq - 1; dormlq_(side, transt, &mi, &ni, &i__1, &A(1,2), lda, &TAU(1), &C(i1,i2), ldc, &WORK(1), lwork, & iinfo); } } return 0; /* End of DORMBR */ } /* dormbr_ */
/* Subroutine */ int zungbr_(char *vect, integer *m, integer *n, integer *k, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *lwork, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZUNGBR generates one of the complex unitary matrices Q or P**H determined by ZGEBRD when reducing a complex matrix A to bidiagonal form: A = Q * B * P**H. Q and P**H are defined as products of elementary reflectors H(i) or G(i) respectively. If VECT = 'Q', A is assumed to have been an M-by-K matrix, and Q is of order M: if m >= k, Q = H(1) H(2) . . . H(k) and ZUNGBR returns the first n columns of Q, where m >= n >= k; if m < k, Q = H(1) H(2) . . . H(m-1) and ZUNGBR returns Q as an M-by-M matrix. If VECT = 'P', A is assumed to have been a K-by-N matrix, and P**H is of order N: if k < n, P**H = G(k) . . . G(2) G(1) and ZUNGBR returns the first m rows of P**H, where n >= m >= k; if k >= n, P**H = G(n-1) . . . G(2) G(1) and ZUNGBR returns P**H as an N-by-N matrix. Arguments ========= VECT (input) CHARACTER*1 Specifies whether the matrix Q or the matrix P**H is required, as defined in the transformation applied by ZGEBRD: = 'Q': generate Q; = 'P': generate P**H. M (input) INTEGER The number of rows of the matrix Q or P**H to be returned. M >= 0. N (input) INTEGER The number of columns of the matrix Q or P**H to be returned. N >= 0. If VECT = 'Q', M >= N >= min(M,K); if VECT = 'P', N >= M >= min(N,K). K (input) INTEGER If VECT = 'Q', the number of columns in the original M-by-K matrix reduced by ZGEBRD. If VECT = 'P', the number of rows in the original K-by-N matrix reduced by ZGEBRD. K >= 0. A (input/output) COMPLEX*16 array, dimension (LDA,N) On entry, the vectors which define the elementary reflectors, as returned by ZGEBRD. On exit, the M-by-N matrix Q or P**H. LDA (input) INTEGER The leading dimension of the array A. LDA >= M. TAU (input) COMPLEX*16 array, dimension (min(M,K)) if VECT = 'Q' (min(N,K)) if VECT = 'P' TAU(i) must contain the scalar factor of the elementary reflector H(i) or G(i), which determines Q or P**H, as returned by ZGEBRD in its array argument TAUQ or TAUP. WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= max(1,min(M,N)). For optimum performance LWORK >= min(M,N)*NB, where NB is the optimal blocksize. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input arguments Parameter adjustments Function Body */ /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; /* Local variables */ static integer i, j; extern logical lsame_(char *, char *); static integer iinfo; static logical wantq; extern /* Subroutine */ int xerbla_(char *, integer *), zunglq_( integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, integer *), zungqr_( integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, integer *); #define TAU(I) tau[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] *info = 0; wantq = lsame_(vect, "Q"); if (! wantq && ! lsame_(vect, "P")) { *info = -1; } else if (*m < 0) { *info = -2; } else if (*n < 0 || wantq && (*n > *m || *n < min(*m,*k)) || ! wantq && ( *m > *n || *m < min(*n,*k))) { *info = -3; } else if (*k < 0) { *info = -4; } else if (*lda < max(1,*m)) { *info = -6; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = 1, i__2 = min(*m,*n); if (*lwork < max(i__1,i__2)) { *info = -9; } } if (*info != 0) { i__1 = -(*info); xerbla_("ZUNGBR", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0) { WORK(1).r = 1., WORK(1).i = 0.; return 0; } if (wantq) { /* Form Q, determined by a call to ZGEBRD to reduce an m-by-k matrix */ if (*m >= *k) { /* If m >= k, assume m >= n >= k */ zungqr_(m, n, k, &A(1,1), lda, &TAU(1), &WORK(1), lwork, & iinfo); } else { /* If m < k, assume m = n Shift the vectors which define the elementary reflect ors one column to the right, and set the first row and column of Q to those of the unit matrix */ for (j = *m; j >= 2; --j) { i__1 = j * a_dim1 + 1; A(1,j).r = 0., A(1,j).i = 0.; i__1 = *m; for (i = j + 1; i <= *m; ++i) { i__2 = i + j * a_dim1; i__3 = i + (j - 1) * a_dim1; A(i,j).r = A(i,j-1).r, A(i,j).i = A(i,j-1).i; /* L10: */ } /* L20: */ } i__1 = a_dim1 + 1; A(1,1).r = 1., A(1,1).i = 0.; i__1 = *m; for (i = 2; i <= *m; ++i) { i__2 = i + a_dim1; A(i,1).r = 0., A(i,1).i = 0.; /* L30: */ } if (*m > 1) { /* Form Q(2:m,2:m) */ i__1 = *m - 1; i__2 = *m - 1; i__3 = *m - 1; zungqr_(&i__1, &i__2, &i__3, &A(2,2), lda, &TAU( 1), &WORK(1), lwork, &iinfo); } } } else { /* Form P', determined by a call to ZGEBRD to reduce a k-by-n matrix */ if (*k < *n) { /* If k < n, assume k <= m <= n */ zunglq_(m, n, k, &A(1,1), lda, &TAU(1), &WORK(1), lwork, & iinfo); } else { /* If k >= n, assume m = n Shift the vectors which define the elementary reflect ors one row downward, and set the first row and column of P' to those of the unit matrix */ i__1 = a_dim1 + 1; A(1,1).r = 1., A(1,1).i = 0.; i__1 = *n; for (i = 2; i <= *n; ++i) { i__2 = i + a_dim1; A(i,1).r = 0., A(i,1).i = 0.; /* L40: */ } i__1 = *n; for (j = 2; j <= *n; ++j) { for (i = j - 1; i >= 2; --i) { i__2 = i + j * a_dim1; i__3 = i - 1 + j * a_dim1; A(i,j).r = A(i-1,j).r, A(i,j).i = A(i-1,j).i; /* L50: */ } i__2 = j * a_dim1 + 1; A(1,j).r = 0., A(1,j).i = 0.; /* L60: */ } if (*n > 1) { /* Form P'(2:n,2:n) */ i__1 = *n - 1; i__2 = *n - 1; i__3 = *n - 1; zunglq_(&i__1, &i__2, &i__3, &A(2,2), lda, &TAU( 1), &WORK(1), lwork, &iinfo); } } } return 0; /* End of ZUNGBR */ } /* zungbr_ */
/* Subroutine */ int sstein_(integer *n, real *d, real *e, integer *m, real * w, integer *iblock, integer *isplit, real *z, integer *ldz, real * work, integer *iwork, integer *ifail, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= SSTEIN computes the eigenvectors of a real symmetric tridiagonal matrix T corresponding to specified eigenvalues, using inverse iteration. The maximum number of iterations allowed for each eigenvector is specified by an internal parameter MAXITS (currently set to 5). Arguments ========= N (input) INTEGER The order of the matrix. N >= 0. D (input) REAL array, dimension (N) The n diagonal elements of the tridiagonal matrix T. E (input) REAL array, dimension (N) The (n-1) subdiagonal elements of the tridiagonal matrix T, in elements 1 to N-1. E(N) need not be set. M (input) INTEGER The number of eigenvectors to be found. 0 <= M <= N. W (input) REAL array, dimension (N) The first M elements of W contain the eigenvalues for which eigenvectors are to be computed. The eigenvalues should be grouped by split-off block and ordered from smallest to largest within the block. ( The output array W from SSTEBZ with ORDER = 'B' is expected here. ) IBLOCK (input) INTEGER array, dimension (N) The submatrix indices associated with the corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to the first submatrix from the top, =2 if W(i) belongs to the second submatrix, etc. ( The output array IBLOCK from SSTEBZ is expected here. ) ISPLIT (input) INTEGER array, dimension (N) The splitting points, at which T breaks up into submatrices. The first submatrix consists of rows/columns 1 to ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 through ISPLIT( 2 ), etc. ( The output array ISPLIT from SSTEBZ is expected here. ) Z (output) REAL array, dimension (LDZ, M) The computed eigenvectors. The eigenvector associated with the eigenvalue W(i) is stored in the i-th column of Z. Any vector which fails to converge is set to its current iterate after MAXITS iterations. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= max(1,N). WORK (workspace) REAL array, dimension (5*N) IWORK (workspace) INTEGER array, dimension (N) IFAIL (output) INTEGER array, dimension (M) On normal exit, all elements of IFAIL are zero. If one or more eigenvectors fail to converge after MAXITS iterations, then their indices are stored in array IFAIL. INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, then i eigenvectors failed to converge in MAXITS iterations. Their indices are stored in array IFAIL. Internal Parameters =================== MAXITS INTEGER, default = 5 The maximum number of iterations performed. EXTRA INTEGER, default = 2 The number of iterations performed after norm growth criterion is satisfied, should be at least 1. ===================================================================== Test the input parameters. Parameter adjustments Function Body */ /* Table of constant values */ static integer c__2 = 2; static integer c__1 = 1; static integer c_n1 = -1; /* System generated locals */ integer z_dim1, z_offset, i__1, i__2, i__3; real r__1, r__2, r__3, r__4, r__5; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static integer jblk, nblk, jmax; extern doublereal sdot_(integer *, real *, integer *, real *, integer *), snrm2_(integer *, real *, integer *); static integer i, j, iseed[4], gpind, iinfo; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *); static integer b1; extern doublereal sasum_(integer *, real *, integer *); static integer j1; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *); static real ortol; extern /* Subroutine */ int saxpy_(integer *, real *, real *, integer *, real *, integer *); static integer indrv1, indrv2, indrv3, indrv4, indrv5, bn; static real xj; extern doublereal slamch_(char *); extern /* Subroutine */ int xerbla_(char *, integer *), slagtf_( integer *, real *, real *, real *, real *, real *, real *, integer *, integer *); static integer nrmchk; extern integer isamax_(integer *, real *, integer *); extern /* Subroutine */ int slagts_(integer *, integer *, real *, real *, real *, real *, integer *, real *, real *, integer *); static integer blksiz; static real onenrm, pertol; extern /* Subroutine */ int slarnv_(integer *, integer *, integer *, real *); static real stpcrt, scl, eps, ctr, sep, nrm, tol; static integer its; static real xjm, eps1; #define ISEED(I) iseed[(I)] #define D(I) d[(I)-1] #define E(I) e[(I)-1] #define W(I) w[(I)-1] #define IBLOCK(I) iblock[(I)-1] #define ISPLIT(I) isplit[(I)-1] #define WORK(I) work[(I)-1] #define IWORK(I) iwork[(I)-1] #define IFAIL(I) ifail[(I)-1] #define Z(I,J) z[(I)-1 + ((J)-1)* ( *ldz)] *info = 0; i__1 = *m; for (i = 1; i <= *m; ++i) { IFAIL(i) = 0; /* L10: */ } if (*n < 0) { *info = -1; } else if (*m < 0 || *m > *n) { *info = -4; } else if (*ldz < max(1,*n)) { *info = -9; } else { i__1 = *m; for (j = 2; j <= *m; ++j) { if (IBLOCK(j) < IBLOCK(j - 1)) { *info = -6; goto L30; } if (IBLOCK(j) == IBLOCK(j - 1) && W(j) < W(j - 1)) { *info = -5; goto L30; } /* L20: */ } L30: ; } if (*info != 0) { i__1 = -(*info); xerbla_("SSTEIN", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *m == 0) { return 0; } else if (*n == 1) { Z(1,1) = 1.f; return 0; } /* Get machine constants. */ eps = slamch_("Precision"); /* Initialize seed for random number generator SLARNV. */ for (i = 1; i <= 4; ++i) { ISEED(i - 1) = 1; /* L40: */ } /* Initialize pointers. */ indrv1 = 0; indrv2 = indrv1 + *n; indrv3 = indrv2 + *n; indrv4 = indrv3 + *n; indrv5 = indrv4 + *n; /* Compute eigenvectors of matrix blocks. */ j1 = 1; i__1 = IBLOCK(*m); for (nblk = 1; nblk <= IBLOCK(*m); ++nblk) { /* Find starting and ending indices of block nblk. */ if (nblk == 1) { b1 = 1; } else { b1 = ISPLIT(nblk - 1) + 1; } bn = ISPLIT(nblk); blksiz = bn - b1 + 1; if (blksiz == 1) { goto L60; } gpind = b1; /* Compute reorthogonalization criterion and stopping criterion . */ onenrm = (r__1 = D(b1), dabs(r__1)) + (r__2 = E(b1), dabs(r__2)); /* Computing MAX */ r__3 = onenrm, r__4 = (r__1 = D(bn), dabs(r__1)) + (r__2 = E(bn - 1), dabs(r__2)); onenrm = dmax(r__3,r__4); i__2 = bn - 1; for (i = b1 + 1; i <= bn-1; ++i) { /* Computing MAX */ r__4 = onenrm, r__5 = (r__1 = D(i), dabs(r__1)) + (r__2 = E(i - 1) , dabs(r__2)) + (r__3 = E(i), dabs(r__3)); onenrm = dmax(r__4,r__5); /* L50: */ } ortol = onenrm * .001f; stpcrt = sqrt(.1f / blksiz); /* Loop through eigenvalues of block nblk. */ L60: jblk = 0; i__2 = *m; for (j = j1; j <= *m; ++j) { if (IBLOCK(j) != nblk) { j1 = j; goto L160; } ++jblk; xj = W(j); /* Skip all the work if the block size is one. */ if (blksiz == 1) { WORK(indrv1 + 1) = 1.f; goto L120; } /* If eigenvalues j and j-1 are too close, add a relativ ely small perturbation. */ if (jblk > 1) { eps1 = (r__1 = eps * xj, dabs(r__1)); pertol = eps1 * 10.f; sep = xj - xjm; if (sep < pertol) { xj = xjm + pertol; } } its = 0; nrmchk = 0; /* Get random starting vector. */ slarnv_(&c__2, iseed, &blksiz, &WORK(indrv1 + 1)); /* Copy the matrix T so it won't be destroyed in factori zation. */ scopy_(&blksiz, &D(b1), &c__1, &WORK(indrv4 + 1), &c__1); i__3 = blksiz - 1; scopy_(&i__3, &E(b1), &c__1, &WORK(indrv2 + 2), &c__1); i__3 = blksiz - 1; scopy_(&i__3, &E(b1), &c__1, &WORK(indrv3 + 1), &c__1); /* Compute LU factors with partial pivoting ( PT = LU ) */ tol = 0.f; slagtf_(&blksiz, &WORK(indrv4 + 1), &xj, &WORK(indrv2 + 2), &WORK( indrv3 + 1), &tol, &WORK(indrv5 + 1), &IWORK(1), &iinfo); /* Update iteration count. */ L70: ++its; if (its > 5) { goto L100; } /* Normalize and scale the righthand side vector Pb. Computing MAX */ r__2 = eps, r__3 = (r__1 = WORK(indrv4 + blksiz), dabs(r__1)); scl = blksiz * onenrm * dmax(r__2,r__3) / sasum_(&blksiz, &WORK( indrv1 + 1), &c__1); sscal_(&blksiz, &scl, &WORK(indrv1 + 1), &c__1); /* Solve the system LU = Pb. */ slagts_(&c_n1, &blksiz, &WORK(indrv4 + 1), &WORK(indrv2 + 2), & WORK(indrv3 + 1), &WORK(indrv5 + 1), &IWORK(1), &WORK( indrv1 + 1), &tol, &iinfo); /* Reorthogonalize by modified Gram-Schmidt if eigenvalu es are close enough. */ if (jblk == 1) { goto L90; } if ((r__1 = xj - xjm, dabs(r__1)) > ortol) { gpind = j; } if (gpind != j) { i__3 = j - 1; for (i = gpind; i <= j-1; ++i) { ctr = -(doublereal)sdot_(&blksiz, &WORK(indrv1 + 1), & c__1, &Z(b1,i), &c__1); saxpy_(&blksiz, &ctr, &Z(b1,i), &c__1, &WORK( indrv1 + 1), &c__1); /* L80: */ } } /* Check the infinity norm of the iterate. */ L90: jmax = isamax_(&blksiz, &WORK(indrv1 + 1), &c__1); nrm = (r__1 = WORK(indrv1 + jmax), dabs(r__1)); /* Continue for additional iterations after norm reaches stopping criterion. */ if (nrm < stpcrt) { goto L70; } ++nrmchk; if (nrmchk < 3) { goto L70; } goto L110; /* If stopping criterion was not satisfied, update info and store eigenvector number in array ifail. */ L100: ++(*info); IFAIL(*info) = j; /* Accept iterate as jth eigenvector. */ L110: scl = 1.f / snrm2_(&blksiz, &WORK(indrv1 + 1), &c__1); jmax = isamax_(&blksiz, &WORK(indrv1 + 1), &c__1); if (WORK(indrv1 + jmax) < 0.f) { scl = -(doublereal)scl; } sscal_(&blksiz, &scl, &WORK(indrv1 + 1), &c__1); L120: i__3 = *n; for (i = 1; i <= *n; ++i) { Z(i,j) = 0.f; /* L130: */ } i__3 = blksiz; for (i = 1; i <= blksiz; ++i) { Z(b1+i-1,j) = WORK(indrv1 + i); /* L140: */ } /* Save the shift to check eigenvalue spacing at next iteration. */ xjm = xj; /* L150: */ } L160: ; } return 0; /* End of SSTEIN */ } /* sstein_ */
/* Subroutine */ int zunghr_(integer *n, integer *ilo, integer *ihi, doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex * work, integer *lwork, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZUNGHR generates a complex unitary matrix Q which is defined as the product of IHI-ILO elementary reflectors of order N, as returned by ZGEHRD: Q = H(ilo) H(ilo+1) . . . H(ihi-1). Arguments ========= N (input) INTEGER The order of the matrix Q. N >= 0. ILO (input) INTEGER IHI (input) INTEGER ILO and IHI must have the same values as in the previous call of ZGEHRD. Q is equal to the unit matrix except in the submatrix Q(ilo+1:ihi,ilo+1:ihi). 1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. A (input/output) COMPLEX*16 array, dimension (LDA,N) On entry, the vectors which define the elementary reflectors, as returned by ZGEHRD. On exit, the N-by-N unitary matrix Q. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). TAU (input) COMPLEX*16 array, dimension (N-1) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by ZGEHRD. WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= IHI-ILO. For optimum performance LWORK >= (IHI-ILO)*NB, where NB is the optimal blocksize. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input arguments Parameter adjustments Function Body */ /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4; /* Local variables */ static integer i, j, iinfo, nh; extern /* Subroutine */ int xerbla_(char *, integer *), zungqr_( integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, integer *); #define TAU(I) tau[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] *info = 0; if (*n < 0) { *info = -1; } else if (*ilo < 1 || *ilo > max(1,*n)) { *info = -2; } else if (*ihi < min(*ilo,*n) || *ihi > *n) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = 1, i__2 = *ihi - *ilo; if (*lwork < max(i__1,i__2)) { *info = -8; } } if (*info != 0) { i__1 = -(*info); xerbla_("ZUNGHR", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { WORK(1).r = 1., WORK(1).i = 0.; return 0; } /* Shift the vectors which define the elementary reflectors one column to the right, and set the first ilo and the last n-ihi rows and columns to those of the unit matrix */ i__1 = *ilo + 1; for (j = *ihi; j >= *ilo+1; --j) { i__2 = j - 1; for (i = 1; i <= j-1; ++i) { i__3 = i + j * a_dim1; A(i,j).r = 0., A(i,j).i = 0.; /* L10: */ } i__2 = *ihi; for (i = j + 1; i <= *ihi; ++i) { i__3 = i + j * a_dim1; i__4 = i + (j - 1) * a_dim1; A(i,j).r = A(i,j-1).r, A(i,j).i = A(i,j-1).i; /* L20: */ } i__2 = *n; for (i = *ihi + 1; i <= *n; ++i) { i__3 = i + j * a_dim1; A(i,j).r = 0., A(i,j).i = 0.; /* L30: */ } /* L40: */ } i__1 = *ilo; for (j = 1; j <= *ilo; ++j) { i__2 = *n; for (i = 1; i <= *n; ++i) { i__3 = i + j * a_dim1; A(i,j).r = 0., A(i,j).i = 0.; /* L50: */ } i__2 = j + j * a_dim1; A(j,j).r = 1., A(j,j).i = 0.; /* L60: */ } i__1 = *n; for (j = *ihi + 1; j <= *n; ++j) { i__2 = *n; for (i = 1; i <= *n; ++i) { i__3 = i + j * a_dim1; A(i,j).r = 0., A(i,j).i = 0.; /* L70: */ } i__2 = j + j * a_dim1; A(j,j).r = 1., A(j,j).i = 0.; /* L80: */ } nh = *ihi - *ilo; if (nh > 0) { /* Generate Q(ilo+1:ihi,ilo+1:ihi) */ zungqr_(&nh, &nh, &nh, &A(*ilo+1,*ilo+1), lda, &TAU(* ilo), &WORK(1), lwork, &iinfo); } return 0; /* End of ZUNGHR */ } /* zunghr_ */
/* Subroutine */ int cungrq_(integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *work, integer *lwork, integer * info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= CUNGRQ generates an M-by-N complex matrix Q with orthonormal rows, which is defined as the last M rows of a product of K elementary reflectors of order N Q = H(1)' H(2)' . . . H(k)' as returned by CGERQF. Arguments ========= M (input) INTEGER The number of rows of the matrix Q. M >= 0. N (input) INTEGER The number of columns of the matrix Q. N >= M. K (input) INTEGER The number of elementary reflectors whose product defines the matrix Q. M >= K >= 0. A (input/output) COMPLEX array, dimension (LDA,N) On entry, the (m-k+i)-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CGERQF in the last k rows of its array argument A. On exit, the M-by-N matrix Q. LDA (input) INTEGER The first dimension of the array A. LDA >= max(1,M). TAU (input) COMPLEX array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by CGERQF. WORK (workspace/output) COMPLEX array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= max(1,M). For optimum performance LWORK >= M*NB, where NB is the optimal blocksize. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument has an illegal value ===================================================================== Test the input arguments Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__3 = 3; static integer c__2 = 2; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; /* Local variables */ static integer i, j, l, nbmin, iinfo; extern /* Subroutine */ int cungr2_(integer *, integer *, integer *, complex *, integer *, complex *, complex *, integer *); static integer ib, nb, ii, kk; extern /* Subroutine */ int clarfb_(char *, char *, char *, char *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, complex *, integer *); static integer nx; extern /* Subroutine */ int clarft_(char *, char *, integer *, integer *, complex *, integer *, complex *, complex *, integer *), xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); static integer ldwork, iws; #define TAU(I) tau[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] *info = 0; if (*m < 0) { *info = -1; } else if (*n < *m) { *info = -2; } else if (*k < 0 || *k > *m) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } else if (*lwork < max(1,*m)) { *info = -8; } if (*info != 0) { i__1 = -(*info); xerbla_("CUNGRQ", &i__1); return 0; } /* Quick return if possible */ if (*m <= 0) { WORK(1).r = 1.f, WORK(1).i = 0.f; return 0; } /* Determine the block size. */ nb = ilaenv_(&c__1, "CUNGRQ", " ", m, n, k, &c_n1, 6L, 1L); nbmin = 2; nx = 0; iws = *m; if (nb > 1 && nb < *k) { /* Determine when to cross over from blocked to unblocked code. Computing MAX */ i__1 = 0, i__2 = ilaenv_(&c__3, "CUNGRQ", " ", m, n, k, &c_n1, 6L, 1L) ; nx = max(i__1,i__2); if (nx < *k) { /* Determine if workspace is large enough for blocked co de. */ ldwork = *m; iws = ldwork * nb; if (*lwork < iws) { /* Not enough workspace to use optimal NB: reduc e NB and determine the minimum value of NB. */ nb = *lwork / ldwork; /* Computing MAX */ i__1 = 2, i__2 = ilaenv_(&c__2, "CUNGRQ", " ", m, n, k, &c_n1, 6L, 1L); nbmin = max(i__1,i__2); } } } if (nb >= nbmin && nb < *k && nx < *k) { /* Use blocked code after the first block. The last kk rows are handled by the block method. Computing MIN */ i__1 = *k, i__2 = (*k - nx + nb - 1) / nb * nb; kk = min(i__1,i__2); /* Set A(1:m-kk,n-kk+1:n) to zero. */ i__1 = *n; for (j = *n - kk + 1; j <= *n; ++j) { i__2 = *m - kk; for (i = 1; i <= *m-kk; ++i) { i__3 = i + j * a_dim1; A(i,j).r = 0.f, A(i,j).i = 0.f; /* L10: */ } /* L20: */ } } else { kk = 0; } /* Use unblocked code for the first or only block. */ i__1 = *m - kk; i__2 = *n - kk; i__3 = *k - kk; cungr2_(&i__1, &i__2, &i__3, &A(1,1), lda, &TAU(1), &WORK(1), &iinfo) ; if (kk > 0) { /* Use blocked code */ i__1 = *k; i__2 = nb; for (i = *k - kk + 1; nb < 0 ? i >= *k : i <= *k; i += nb) { /* Computing MIN */ i__3 = nb, i__4 = *k - i + 1; ib = min(i__3,i__4); ii = *m - *k + i; if (ii > 1) { /* Form the triangular factor of the block reflec tor H = H(i+ib-1) . . . H(i+1) H(i) */ i__3 = *n - *k + i + ib - 1; clarft_("Backward", "Rowwise", &i__3, &ib, &A(ii,1), lda, &TAU(i), &WORK(1), &ldwork); /* Apply H' to A(1:m-k+i-1,1:n-k+i+ib-1) from the right */ i__3 = ii - 1; i__4 = *n - *k + i + ib - 1; clarfb_("Right", "Conjugate transpose", "Backward", "Rowwise", &i__3, &i__4, &ib, &A(ii,1), lda, &WORK(1), & ldwork, &A(1,1), lda, &WORK(ib + 1), &ldwork); } /* Apply H' to columns 1:n-k+i+ib-1 of current block */ i__3 = *n - *k + i + ib - 1; cungr2_(&ib, &i__3, &ib, &A(ii,1), lda, &TAU(i), &WORK(1), &iinfo); /* Set columns n-k+i+ib:n of current block to zero */ i__3 = *n; for (l = *n - *k + i + ib; l <= *n; ++l) { i__4 = ii + ib - 1; for (j = ii; j <= ii+ib-1; ++j) { i__5 = j + l * a_dim1; A(j,l).r = 0.f, A(j,l).i = 0.f; /* L30: */ } /* L40: */ } /* L50: */ } } WORK(1).r = (real) iws, WORK(1).i = 0.f; return 0; /* End of CUNGRQ */ } /* cungrq_ */
/* Subroutine */ int sposvx_(char *fact, char *uplo, integer *n, integer * nrhs, real *a, integer *lda, real *af, integer *ldaf, char *equed, real *s, real *b, integer *ldb, real *x, integer *ldx, real *rcond, real *ferr, real *berr, real *work, integer *iwork, integer *info) { /* -- LAPACK driver routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= SPOSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to compute the solution to a real system of linear equations A * X = B, where A is an N-by-N symmetric positive definite matrix and X and B are N-by-NRHS matrices. Error bounds on the solution and a condition estimate are also provided. Description =========== The following steps are performed: 1. If FACT = 'E', real scaling factors are computed to equilibrate the system: diag(S) * A * diag(S) * inv(diag(S)) * X = diag(S) * B Whether or not the system will be equilibrated depends on the scaling of the matrix A, but if equilibration is used, A is overwritten by diag(S)*A*diag(S) and B by diag(S)*B. 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to factor the matrix A (after equilibration if FACT = 'E') as A = U**T* U, if UPLO = 'U', or A = L * L**T, if UPLO = 'L', where U is an upper triangular matrix and L is a lower triangular matrix. 3. The factored form of A is used to estimate the condition number of the matrix A. If the reciprocal of the condition number is less than machine precision, steps 4-6 are skipped. 4. The system of equations is solved for X using the factored form of A. 5. Iterative refinement is applied to improve the computed solution matrix and calculate error bounds and backward error estimates for it. 6. If equilibration was used, the matrix X is premultiplied by diag(S) so that it solves the original system before equilibration. Arguments ========= FACT (input) CHARACTER*1 Specifies whether or not the factored form of the matrix A is supplied on entry, and if not, whether the matrix A should be equilibrated before it is factored. = 'F': On entry, AF contains the factored form of A. If EQUED = 'Y', the matrix A has been equilibrated with scaling factors given by S. A and AF will not be modified. = 'N': The matrix A will be copied to AF and factored. = 'E': The matrix A will be equilibrated if necessary, then copied to AF and factored. UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The number of linear equations, i.e., the order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrices B and X. NRHS >= 0. A (input/output) REAL array, dimension (LDA,N) On entry, the symmetric matrix A, except if FACT = 'F' and EQUED = 'Y', then A must contain the equilibrated matrix diag(S)*A*diag(S). If UPLO = 'U', the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. A is not modified if FACT = 'F' or 'N', or if FACT = 'E' and EQUED = 'N' on exit. On exit, if FACT = 'E' and EQUED = 'Y', A is overwritten by diag(S)*A*diag(S). LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). AF (input or output) REAL array, dimension (LDAF,N) If FACT = 'F', then AF is an input argument and on entry contains the triangular factor U or L from the Cholesky factorization A = U**T*U or A = L*L**T, in the same storage format as A. If EQUED .ne. 'N', then AF is the factored form of the equilibrated matrix diag(S)*A*diag(S). If FACT = 'N', then AF is an output argument and on exit returns the triangular factor U or L from the Cholesky factorization A = U**T*U or A = L*L**T of the original matrix A. If FACT = 'E', then AF is an output argument and on exit returns the triangular factor U or L from the Cholesky factorization A = U**T*U or A = L*L**T of the equilibrated matrix A (see the description of A for the form of the equilibrated matrix). LDAF (input) INTEGER The leading dimension of the array AF. LDAF >= max(1,N). EQUED (input or output) CHARACTER*1 Specifies the form of equilibration that was done. = 'N': No equilibration (always true if FACT = 'N'). = 'Y': Equilibration was done, i.e., A has been replaced by diag(S) * A * diag(S). EQUED is an input argument if FACT = 'F'; otherwise, it is an output argument. S (input or output) REAL array, dimension (N) The scale factors for A; not accessed if EQUED = 'N'. S is an input argument if FACT = 'F'; otherwise, S is an output argument. If FACT = 'F' and EQUED = 'Y', each element of S must be positive. B (input/output) REAL array, dimension (LDB,NRHS) On entry, the N-by-NRHS right hand side matrix B. On exit, if EQUED = 'N', B is not modified; if EQUED = 'Y', B is overwritten by diag(S) * B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). X (output) REAL array, dimension (LDX,NRHS) If INFO = 0, the N-by-NRHS solution matrix X to the original system of equations. Note that if EQUED = 'Y', A and B are modified on exit, and the solution to the equilibrated system is inv(diag(S))*X. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). RCOND (output) REAL The estimate of the reciprocal condition number of the matrix A after equilibration (if done). If RCOND is less than the machine precision (in particular, if RCOND = 0), the matrix is singular to working precision. This condition is indicated by a return code of INFO > 0, and the solution and error bounds are not computed. FERR (output) REAL array, dimension (NRHS) The estimated forward error bound for each solution vector X(j) (the j-th column of the solution matrix X). If XTRUE is the true solution corresponding to X(j), FERR(j) is an estimated upper bound for the magnitude of the largest element in (X(j) - XTRUE) divided by the magnitude of the largest element in X(j). The estimate is as reliable as the estimate for RCOND, and is almost always a slight overestimate of the true error. BERR (output) REAL array, dimension (NRHS) The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution). WORK (workspace) REAL array, dimension (3*N) IWORK (workspace) INTEGER array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i, and i is <= N: the leading minor of order i of A is not positive definite, so the factorization could not be completed, and the solution and error bounds could not be computed. = N+1: RCOND is less than machine precision. The factorization has been completed, but the matrix is singular to working precision, and the solution and error bounds have not been computed. ===================================================================== Parameter adjustments Function Body */ /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, x_offset, i__1, i__2; real r__1, r__2; /* Local variables */ static real amax, smin, smax; static integer i, j; extern logical lsame_(char *, char *); static real scond, anorm; static logical equil, rcequ; extern doublereal slamch_(char *); static logical nofact; extern /* Subroutine */ int xerbla_(char *, integer *); static real bignum; static integer infequ; extern /* Subroutine */ int slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *), spocon_(char *, integer *, real *, integer *, real *, real *, real *, integer *, integer *); extern doublereal slansy_(char *, char *, integer *, real *, integer *, real *); static real smlnum; extern /* Subroutine */ int slaqsy_(char *, integer *, real *, integer *, real *, real *, real *, char *), spoequ_(integer * , real *, integer *, real *, real *, real *, integer *), sporfs_( char *, integer *, integer *, real *, integer *, real *, integer * , real *, integer *, real *, integer *, real *, real *, real *, integer *, integer *), spotrf_(char *, integer *, real *, integer *, integer *), spotrs_(char *, integer *, integer *, real *, integer *, real *, integer *, integer *); #define S(I) s[(I)-1] #define FERR(I) ferr[(I)-1] #define BERR(I) berr[(I)-1] #define WORK(I) work[(I)-1] #define IWORK(I) iwork[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define AF(I,J) af[(I)-1 + ((J)-1)* ( *ldaf)] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] #define X(I,J) x[(I)-1 + ((J)-1)* ( *ldx)] *info = 0; nofact = lsame_(fact, "N"); equil = lsame_(fact, "E"); if (nofact || equil) { *(unsigned char *)equed = 'N'; rcequ = FALSE_; } else { rcequ = lsame_(equed, "Y"); smlnum = slamch_("Safe minimum"); bignum = 1.f / smlnum; } /* Test the input parameters. */ if (! nofact && ! equil && ! lsame_(fact, "F")) { *info = -1; } else if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*lda < max(1,*n)) { *info = -6; } else if (*ldaf < max(1,*n)) { *info = -8; } else if (lsame_(fact, "F") && ! (rcequ || lsame_(equed, "N"))) { *info = -9; } else { if (rcequ) { smin = bignum; smax = 0.f; i__1 = *n; for (j = 1; j <= *n; ++j) { /* Computing MIN */ r__1 = smin, r__2 = S(j); smin = dmin(r__1,r__2); /* Computing MAX */ r__1 = smax, r__2 = S(j); smax = dmax(r__1,r__2); /* L10: */ } if (smin <= 0.f) { *info = -10; } else if (*n > 0) { scond = dmax(smin,smlnum) / dmin(smax,bignum); } else { scond = 1.f; } } if (*info == 0) { if (*ldb < max(1,*n)) { *info = -12; } else if (*ldx < max(1,*n)) { *info = -14; } } } if (*info != 0) { i__1 = -(*info); xerbla_("SPOSVX", &i__1); return 0; } if (equil) { /* Compute row and column scalings to equilibrate the matrix A. */ spoequ_(n, &A(1,1), lda, &S(1), &scond, &amax, &infequ); if (infequ == 0) { /* Equilibrate the matrix. */ slaqsy_(uplo, n, &A(1,1), lda, &S(1), &scond, &amax, equed); rcequ = lsame_(equed, "Y"); } } /* Scale the right hand side. */ if (rcequ) { i__1 = *nrhs; for (j = 1; j <= *nrhs; ++j) { i__2 = *n; for (i = 1; i <= *n; ++i) { B(i,j) = S(i) * B(i,j); /* L20: */ } /* L30: */ } } if (nofact || equil) { /* Compute the Cholesky factorization A = U'*U or A = L*L'. */ slacpy_(uplo, n, n, &A(1,1), lda, &AF(1,1), ldaf); spotrf_(uplo, n, &AF(1,1), ldaf, info); /* Return if INFO is non-zero. */ if (*info != 0) { if (*info > 0) { *rcond = 0.f; } return 0; } } /* Compute the norm of the matrix A. */ anorm = slansy_("1", uplo, n, &A(1,1), lda, &WORK(1)); /* Compute the reciprocal of the condition number of A. */ spocon_(uplo, n, &AF(1,1), ldaf, &anorm, rcond, &WORK(1), &IWORK(1), info); /* Return if the matrix is singular to working precision. */ if (*rcond < slamch_("Epsilon")) { *info = *n + 1; return 0; } /* Compute the solution matrix X. */ slacpy_("Full", n, nrhs, &B(1,1), ldb, &X(1,1), ldx); spotrs_(uplo, n, nrhs, &AF(1,1), ldaf, &X(1,1), ldx, info); /* Use iterative refinement to improve the computed solution and compute error bounds and backward error estimates for it. */ sporfs_(uplo, n, nrhs, &A(1,1), lda, &AF(1,1), ldaf, &B(1,1), ldb, &X(1,1), ldx, &FERR(1), &BERR(1), &WORK(1), & IWORK(1), info); /* Transform the solution matrix X to a solution of the original system. */ if (rcequ) { i__1 = *nrhs; for (j = 1; j <= *nrhs; ++j) { i__2 = *n; for (i = 1; i <= *n; ++i) { X(i,j) = S(i) * X(i,j); /* L40: */ } /* L50: */ } i__1 = *nrhs; for (j = 1; j <= *nrhs; ++j) { FERR(j) /= scond; /* L60: */ } } return 0; /* End of SPOSVX */ } /* sposvx_ */
LONG DOUBLE qlange_(char *norm, int *m, int *n, LONG DOUBLE *a, int #endif *lda, LONG DOUBLE *work) { /* -- LAPACK auxiliary routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 Purpose ======= DLANGE returns the value of the one norm, or the Frobenius norm, or the infinity norm, or the element of largest absolute value of a real matrix A. Description =========== DLANGE returns the value DLANGE = ( MAX(ABS(A(i,j))), NORM = 'M' or 'm' ( ( norm1(A), NORM = '1', 'O' or 'o' ( ( normI(A), NORM = 'I' or 'i' ( ( normF(A), NORM = 'F', 'f', 'E' or 'e' where norm1 denotes the one norm of a matrix (maximum column sum), normI denotes the infinity norm of a matrix (maximum row sum) and normF denotes the Frobenius norm of a matrix (square root of sum of squares). Note that MAX(ABS(A(i,j))) is not a matrix norm. Arguments ========= NORM (input) CHARACTER*1 Specifies the value to be returned in DLANGE as described above. M (input) INT The number of rows of the matrix A. M >= 0. When M = 0, DLANGE is set to zero. N (input) INT The number of columns of the matrix A. N >= 0. When N = 0, DLANGE is set to zero. A (input) LONG DOUBLE PRECISION array, dimension (LDA,N) The m by n matrix A. LDA (input) INT The leading dimension of the array A. LDA >= MAX(M,1). WORK (workspace) LONG DOUBLE PRECISION array, dimension (LWORK), where LWORK >= M when NORM = 'I'; otherwise, WORK is not referenced. ===================================================================== Parameter adjustments Function Body */ /* Table of constant values */ static int c__1 = 1; /* System generated locals */ int i__1, i__2; LONG DOUBLE ret_val, d__1, d__2, d__3; /* Builtin functions */ /* Local variables */ static int i, j; static LONG DOUBLE scale; extern long int lsame_(char *, char *); static LONG DOUBLE value; #ifdef PETSC_PREFIX_SUFFIX extern /* Subroutine */ void dlassq_(int *, LONG DOUBLE *, int *, #endif #ifdef Q_C_PREFIX_SUFFIX extern /* Subroutine */ void qlassq(int *, LONG DOUBLE *, int *, #endif #ifdef Q_NORMAL_PREFIX_SUFFIX extern /* Subroutine */ void qlassq_(int *, LONG DOUBLE *, int *, #endif LONG DOUBLE *, LONG DOUBLE *); static LONG DOUBLE sum; #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] if (MIN(*m,*n) == 0) { value = 0.; } else if (lsame_(norm, "M")) { /* Find MAX(ABS(A(i,j))). */ value = 0.; i__1 = *n; for (j = 1; j <= *n; ++j) { i__2 = *m; for (i = 1; i <= *m; ++i) { /* Computing MAX */ d__2 = value, d__3 = (d__1 = A(i,j), ABS(d__1)); value = MAX(d__2,d__3); /* L10: */ } /* L20: */ } } else if (lsame_(norm, "O") || *(unsigned char *)norm == '1') { /* Find norm1(A). */ value = 0.; i__1 = *n; for (j = 1; j <= *n; ++j) { sum = 0.; i__2 = *m; for (i = 1; i <= *m; ++i) { sum += (d__1 = A(i,j), ABS(d__1)); /* L30: */ } value = MAX(value,sum); /* L40: */ } } else if (lsame_(norm, "I")) { /* Find normI(A). */ i__1 = *m; for (i = 1; i <= *m; ++i) { WORK(i) = 0.; /* L50: */ } i__1 = *n; for (j = 1; j <= *n; ++j) { i__2 = *m; for (i = 1; i <= *m; ++i) { WORK(i) += (d__1 = A(i,j), ABS(d__1)); /* L60: */ } /* L70: */ } value = 0.; i__1 = *m; for (i = 1; i <= *m; ++i) { /* Computing MAX */ d__1 = value, d__2 = WORK(i); value = MAX(d__1,d__2); /* L80: */ } } else if (lsame_(norm, "F") || lsame_(norm, "E")) { /* Find normF(A). */ scale = 0.; sum = 1.; i__1 = *n; for (j = 1; j <= *n; ++j) { #ifdef PETSC_PREFIX_SUFFIX dlassq_(m, &A(1,j), &c__1, &scale, &sum); #endif #ifdef Q_C_PREFIX_SUFFIX qlassq(m, &A(1,j), &c__1, &scale, &sum); #endif #ifdef Q_NORMAL_PREFIX_SUFFIX qlassq_(m, &A(1,j), &c__1, &scale, &sum); #endif /* L90: */ } value = scale * sqrt(sum); } ret_val = value; return ret_val; /* End of DLANGE */ } /* dlange_ */
/* Subroutine */ int zggglm_(integer *n, integer *m, integer *p, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublecomplex *d, doublecomplex *x, doublecomplex *y, doublecomplex * work, integer *lwork, integer *info) { /* -- LAPACK driver routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZGGGLM solves a general Gauss-Markov linear model (GLM) problem: minimize || y ||_2 subject to d = A*x + B*y x where A is an N-by-M matrix, B is an N-by-P matrix, and d is a given N-vector. It is assumed that M <= N <= M+P, and rank(A) = M and rank( A B ) = N. Under these assumptions, the constrained equation is always consistent, and there is a unique solution x and a minimal 2-norm solution y, which is obtained using a generalized QR factorization of A and B. In particular, if matrix B is square nonsingular, then the problem GLM is equivalent to the following weighted linear least squares problem minimize || inv(B)*(d-A*x) ||_2 x where inv(B) denotes the inverse of B. Arguments ========= N (input) INTEGER The number of rows of the matrices A and B. N >= 0. M (input) INTEGER The number of columns of the matrix A. 0 <= M <= N. P (input) INTEGER The number of columns of the matrix B. P >= N-M. A (input/output) COMPLEX*16 array, dimension (LDA,M) On entry, the N-by-M matrix A. On exit, A is destroyed. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input/output) COMPLEX*16 array, dimension (LDB,P) On entry, the N-by-P matrix B. On exit, B is destroyed. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). D (input/output) COMPLEX*16 array, dimension (N) On entry, D is the left hand side of the GLM equation. On exit, D is destroyed. X (output) COMPLEX*16 array, dimension (M) Y (output) COMPLEX*16 array, dimension (P) On exit, X and Y are the solutions of the GLM problem. WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= max(1,N+M+P). For optimum performance, LWORK >= M+min(N,P)+max(N,P)*NB, where NB is an upper bound for the optimal blocksizes for ZGEQRF, CGERQF, ZUNMQR and CUNMRQ. INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. =================================================================== Test the input parameters Parameter adjustments Function Body */ /* Table of constant values */ static doublecomplex c_b2 = {1.,0.}; static integer c__1 = 1; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; doublereal d__1; doublecomplex z__1; /* Local variables */ static integer lopt, i; extern /* Subroutine */ int zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *), zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), ztrsv_(char *, char *, char *, integer *, doublecomplex *, integer *, doublecomplex *, integer *); static integer np; extern /* Subroutine */ int xerbla_(char *, integer *), zggqrf_( integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, integer *), zunmqr_(char *, char *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, integer *), zunmrq_(char *, char *, integer *, integer *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, integer *); #define D(I) d[(I)-1] #define X(I) x[(I)-1] #define Y(I) y[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] *info = 0; np = min(*n,*p); if (*n < 0) { *info = -1; } else if (*m < 0 || *m > *n) { *info = -2; } else if (*p < 0 || *p < *n - *m) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = 1, i__2 = *n + *m + *p; if (*lwork < max(i__1,i__2)) { *info = -12; } } if (*info != 0) { i__1 = -(*info); xerbla_("ZGGGLM", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Compute the GQR factorization of matrices A and B: Q'*A = ( R11 ) M, Q'*B*Z' = ( T11 T12 ) M ( 0 ) N-M ( 0 T22 ) N-M M M+P-N N-M where R11 and T22 are upper triangular, and Q and Z are unitary. */ i__1 = *lwork - *m - np; zggqrf_(n, m, p, &A(1,1), lda, &WORK(1), &B(1,1), ldb, &WORK(*m + 1), &WORK(*m + np + 1), &i__1, info); i__1 = *m + np + 1; lopt = (integer) WORK(*m+np+1).r; /* Update left-hand-side vector d = Q'*d = ( d1 ) M ( d2 ) N-M */ i__1 = max(1,*n); i__2 = *lwork - *m - np; zunmqr_("Left", "Conjugate transpose", n, &c__1, m, &A(1,1), lda, & WORK(1), &D(1), &i__1, &WORK(*m + np + 1), &i__2, info); /* Computing MAX */ i__3 = *m + np + 1; i__1 = lopt, i__2 = (integer) WORK(*m+np+1).r; lopt = max(i__1,i__2); /* Solve T22*y2 = d2 for y2 */ i__1 = *n - *m; ztrsv_("Upper", "No transpose", "Non unit", &i__1, &B(*m+1,*m+*p-*n+1), ldb, &D(*m + 1), &c__1); i__1 = *n - *m; zcopy_(&i__1, &D(*m + 1), &c__1, &Y(*m + *p - *n + 1), &c__1); /* Set y1 = 0 */ i__1 = *m + *p - *n; for (i = 1; i <= *m+*p-*n; ++i) { i__2 = i; Y(i).r = 0., Y(i).i = 0.; /* L10: */ } /* Update d1 = d1 - T12*y2 */ i__1 = *n - *m; z__1.r = -1., z__1.i = 0.; zgemv_("No transpose", m, &i__1, &z__1, &B(1,*m+*p-*n+1), ldb, &Y(*m + *p - *n + 1), &c__1, &c_b2, &D(1), &c__1); /* Solve triangular system: R11*x = d1 */ ztrsv_("Upper", "No Transpose", "Non unit", m, &A(1,1), lda, &D(1), & c__1); /* Copy D to X */ zcopy_(m, &D(1), &c__1, &X(1), &c__1); /* Backward transformation y = Z'*y Computing MAX */ i__1 = 1, i__2 = *n - *p + 1; i__3 = max(1,*p); i__4 = *lwork - *m - np; zunmrq_("Left", "Conjugate transpose", p, &c__1, &np, &B(max(1,*n-*p+1),1), ldb, &WORK(*m + 1), &Y(1), &i__3, &WORK(*m + np + 1), & i__4, info); /* Computing MAX */ i__3 = *m + np + 1; i__1 = lopt, i__2 = (integer) WORK(*m+np+1).r; d__1 = (doublereal) max(i__1,i__2); WORK(1).r = d__1, WORK(1).i = 0.; return 0; /* End of ZGGGLM */ } /* zggglm_ */
/* Subroutine */ int cung2l_(integer *m, integer *n, integer *k, complex *a, integer *lda, complex *tau, complex *work, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= CUNG2L generates an m by n complex matrix Q with orthonormal columns, which is defined as the last n columns of a product of k elementary reflectors of order m Q = H(k) . . . H(2) H(1) as returned by CGEQLF. Arguments ========= M (input) INTEGER The number of rows of the matrix Q. M >= 0. N (input) INTEGER The number of columns of the matrix Q. M >= N >= 0. K (input) INTEGER The number of elementary reflectors whose product defines the matrix Q. N >= K >= 0. A (input/output) COMPLEX array, dimension (LDA,N) On entry, the (n-k+i)-th column must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by CGEQLF in the last k columns of its array argument A. On exit, the m-by-n matrix Q. LDA (input) INTEGER The first dimension of the array A. LDA >= max(1,M). TAU (input) COMPLEX array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by CGEQLF. WORK (workspace) COMPLEX array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument has an illegal value ===================================================================== Test the input arguments Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3; complex q__1; /* Local variables */ static integer i, j, l; extern /* Subroutine */ int cscal_(integer *, complex *, complex *, integer *), clarf_(char *, integer *, integer *, complex *, integer *, complex *, complex *, integer *, complex *); static integer ii; extern /* Subroutine */ int xerbla_(char *, integer *); #define TAU(I) tau[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0 || *n > *m) { *info = -2; } else if (*k < 0 || *k > *n) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } if (*info != 0) { i__1 = -(*info); xerbla_("CUNG2L", &i__1); return 0; } /* Quick return if possible */ if (*n <= 0) { return 0; } /* Initialise columns 1:n-k to columns of the unit matrix */ i__1 = *n - *k; for (j = 1; j <= *n-*k; ++j) { i__2 = *m; for (l = 1; l <= *m; ++l) { i__3 = l + j * a_dim1; A(l,j).r = 0.f, A(l,j).i = 0.f; /* L10: */ } i__2 = *m - *n + j + j * a_dim1; A(*m-*n+j,j).r = 1.f, A(*m-*n+j,j).i = 0.f; /* L20: */ } i__1 = *k; for (i = 1; i <= *k; ++i) { ii = *n - *k + i; /* Apply H(i) to A(1:m-k+i,1:n-k+i) from the left */ i__2 = *m - *n + ii + ii * a_dim1; A(*m-*n+ii,ii).r = 1.f, A(*m-*n+ii,ii).i = 0.f; i__2 = *m - *n + ii; i__3 = ii - 1; clarf_("Left", &i__2, &i__3, &A(1,ii), &c__1, &TAU(i), &A(1,1), lda, &WORK(1)); i__2 = *m - *n + ii - 1; i__3 = i; q__1.r = -(doublereal)TAU(i).r, q__1.i = -(doublereal)TAU(i).i; cscal_(&i__2, &q__1, &A(1,ii), &c__1); i__2 = *m - *n + ii + ii * a_dim1; i__3 = i; q__1.r = 1.f - TAU(i).r, q__1.i = 0.f - TAU(i).i; A(*m-*n+ii,ii).r = q__1.r, A(*m-*n+ii,ii).i = q__1.i; /* Set A(m-k+i+1:m,n-k+i) to zero */ i__2 = *m; for (l = *m - *n + ii + 1; l <= *m; ++l) { i__3 = l + ii * a_dim1; A(l,ii).r = 0.f, A(l,ii).i = 0.f; /* L30: */ } /* L40: */ } return 0; /* End of CUNG2L */ } /* cung2l_ */
/* Subroutine */ int sorml2_(char *side, char *trans, integer *m, integer *n, integer *k, real *a, integer *lda, real *tau, real *c, integer *ldc, real *work, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= SORML2 overwrites the general real m by n matrix C with Q * C if SIDE = 'L' and TRANS = 'N', or Q'* C if SIDE = 'L' and TRANS = 'T', or C * Q if SIDE = 'R' and TRANS = 'N', or C * Q' if SIDE = 'R' and TRANS = 'T', where Q is a real orthogonal matrix defined as the product of k elementary reflectors Q = H(k) . . . H(2) H(1) as returned by SGELQF. Q is of order m if SIDE = 'L' and of order n if SIDE = 'R'. Arguments ========= SIDE (input) CHARACTER*1 = 'L': apply Q or Q' from the Left = 'R': apply Q or Q' from the Right TRANS (input) CHARACTER*1 = 'N': apply Q (No transpose) = 'T': apply Q' (Transpose) M (input) INTEGER The number of rows of the matrix C. M >= 0. N (input) INTEGER The number of columns of the matrix C. N >= 0. K (input) INTEGER The number of elementary reflectors whose product defines the matrix Q. If SIDE = 'L', M >= K >= 0; if SIDE = 'R', N >= K >= 0. A (input) REAL array, dimension (LDA,M) if SIDE = 'L', (LDA,N) if SIDE = 'R' The i-th row must contain the vector which defines the elementary reflector H(i), for i = 1,2,...,k, as returned by SGELQF in the first k rows of its array argument A. A is modified by the routine but restored on exit. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,K). TAU (input) REAL array, dimension (K) TAU(i) must contain the scalar factor of the elementary reflector H(i), as returned by SGELQF. C (input/output) REAL array, dimension (LDC,N) On entry, the m by n matrix C. On exit, C is overwritten by Q*C or Q'*C or C*Q' or C*Q. LDC (input) INTEGER The leading dimension of the array C. LDC >= max(1,M). WORK (workspace) REAL array, dimension (N) if SIDE = 'L', (M) if SIDE = 'R' INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input arguments Parameter adjustments Function Body */ /* System generated locals */ integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2; /* Local variables */ static logical left; static integer i; extern logical lsame_(char *, char *); extern /* Subroutine */ int slarf_(char *, integer *, integer *, real *, integer *, real *, real *, integer *, real *); static integer i1, i2, i3, ic, jc, mi, ni, nq; extern /* Subroutine */ int xerbla_(char *, integer *); static logical notran; static real aii; #define TAU(I) tau[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define C(I,J) c[(I)-1 + ((J)-1)* ( *ldc)] *info = 0; left = lsame_(side, "L"); notran = lsame_(trans, "N"); /* NQ is the order of Q */ if (left) { nq = *m; } else { nq = *n; } if (! left && ! lsame_(side, "R")) { *info = -1; } else if (! notran && ! lsame_(trans, "T")) { *info = -2; } else if (*m < 0) { *info = -3; } else if (*n < 0) { *info = -4; } else if (*k < 0 || *k > nq) { *info = -5; } else if (*lda < max(1,*k)) { *info = -7; } else if (*ldc < max(1,*m)) { *info = -10; } if (*info != 0) { i__1 = -(*info); xerbla_("SORML2", &i__1); return 0; } /* Quick return if possible */ if (*m == 0 || *n == 0 || *k == 0) { return 0; } if (left && notran || ! left && ! notran) { i1 = 1; i2 = *k; i3 = 1; } else { i1 = *k; i2 = 1; i3 = -1; } if (left) { ni = *n; jc = 1; } else { mi = *m; ic = 1; } i__1 = i2; i__2 = i3; for (i = i1; i3 < 0 ? i >= i2 : i <= i2; i += i3) { if (left) { /* H(i) is applied to C(i:m,1:n) */ mi = *m - i + 1; ic = i; } else { /* H(i) is applied to C(1:m,i:n) */ ni = *n - i + 1; jc = i; } /* Apply H(i) */ aii = A(i,i); A(i,i) = 1.f; slarf_(side, &mi, &ni, &A(i,i), lda, &TAU(i), &C(ic,jc), ldc, &WORK(1)); A(i,i) = aii; /* L10: */ } return 0; /* End of SORML2 */ } /* sorml2_ */
/* Subroutine */ int chetrd_(char *uplo, integer *n, complex *a, integer *lda, real *d, real *e, complex *tau, complex *work, integer *lwork, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= CHETRD reduces a complex Hermitian matrix A to real symmetric tridiagonal form T by a unitary similarity transformation: Q**H * A * Q = T. Arguments ========= UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. A (input/output) COMPLEX array, dimension (LDA,N) On entry, the Hermitian matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if UPLO = 'U', the diagonal and first superdiagonal of A are overwritten by the corresponding elements of the tridiagonal matrix T, and the elements above the first superdiagonal, with the array TAU, represent the unitary matrix Q as a product of elementary reflectors; if UPLO = 'L', the diagonal and first subdiagonal of A are over- written by the corresponding elements of the tridiagonal matrix T, and the elements below the first subdiagonal, with the array TAU, represent the unitary matrix Q as a product of elementary reflectors. See Further Details. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). D (output) REAL array, dimension (N) The diagonal elements of the tridiagonal matrix T: D(i) = A(i,i). E (output) REAL array, dimension (N-1) The off-diagonal elements of the tridiagonal matrix T: E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'. TAU (output) COMPLEX array, dimension (N-1) The scalar factors of the elementary reflectors (see Further Details). WORK (workspace/output) COMPLEX array, dimension (LWORK) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= 1. For optimum performance LWORK >= N*NB, where NB is the optimal blocksize. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Further Details =============== If UPLO = 'U', the matrix Q is represented as a product of elementary reflectors Q = H(n-1) . . . H(2) H(1). Each H(i) has the form H(i) = I - tau * v * v' where tau is a complex scalar, and v is a complex vector with v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in A(1:i-1,i+1), and tau in TAU(i). If UPLO = 'L', the matrix Q is represented as a product of elementary reflectors Q = H(1) H(2) . . . H(n-1). Each H(i) has the form H(i) = I - tau * v * v' where tau is a complex scalar, and v is a complex vector with v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i), and tau in TAU(i). The contents of A on exit are illustrated by the following examples with n = 5: if UPLO = 'U': if UPLO = 'L': ( d e v2 v3 v4 ) ( d ) ( d e v3 v4 ) ( e d ) ( d e v4 ) ( v1 e d ) ( d e ) ( v1 v2 e d ) ( d ) ( v1 v2 v3 e d ) where d and e denote diagonal and off-diagonal elements of T, and vi denotes an element of the vector defining H(i). ===================================================================== Test the input parameters Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static integer c__3 = 3; static integer c__2 = 2; static real c_b23 = 1.f; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; complex q__1; /* Local variables */ static integer i, j; extern logical lsame_(char *, char *); static integer nbmin, iinfo; static logical upper; extern /* Subroutine */ int chetd2_(char *, integer *, complex *, integer *, real *, real *, complex *, integer *), cher2k_(char *, char *, integer *, integer *, complex *, complex *, integer *, complex *, integer *, real *, complex *, integer *); static integer nb, kk, nx; extern /* Subroutine */ int clatrd_(char *, integer *, integer *, complex *, integer *, real *, complex *, complex *, integer *), xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); static integer ldwork, iws; #define D(I) d[(I)-1] #define E(I) e[(I)-1] #define TAU(I) tau[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*n)) { *info = -4; } else if (*lwork < 1) { *info = -9; } if (*info != 0) { i__1 = -(*info); xerbla_("CHETRD", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { WORK(1).r = 1.f, WORK(1).i = 0.f; return 0; } /* Determine the block size. */ nb = ilaenv_(&c__1, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, 6L, 1L); nx = *n; iws = 1; if (nb > 1 && nb < *n) { /* Determine when to cross over from blocked to unblocked code (last block is always handled by unblocked code). Computing MAX */ i__1 = nb, i__2 = ilaenv_(&c__3, "CHETRD", uplo, n, &c_n1, &c_n1, & c_n1, 6L, 1L); nx = max(i__1,i__2); if (nx < *n) { /* Determine if workspace is large enough for blocked co de. */ ldwork = *n; iws = ldwork * nb; if (*lwork < iws) { /* Not enough workspace to use optimal NB: deter mine the minimum value of NB, and reduce NB or force us e of unblocked code by setting NX = N. Computing MAX */ i__1 = *lwork / ldwork; nb = max(i__1,1); nbmin = ilaenv_(&c__2, "CHETRD", uplo, n, &c_n1, &c_n1, &c_n1, 6L, 1L); if (nb < nbmin) { nx = *n; } } } else { nx = *n; } } else { nb = 1; } if (upper) { /* Reduce the upper triangle of A. Columns 1:kk are handled by the unblocked method. */ kk = *n - (*n - nx + nb - 1) / nb * nb; i__1 = kk + 1; i__2 = -nb; for (i = *n - nb + 1; -nb < 0 ? i >= kk+1 : i <= kk+1; i += -nb) { /* Reduce columns i:i+nb-1 to tridiagonal form and form the matrix W which is needed to update the unreduced part of the matrix */ i__3 = i + nb - 1; clatrd_(uplo, &i__3, &nb, &A(1,1), lda, &E(1), &TAU(1), & WORK(1), &ldwork); /* Update the unreduced submatrix A(1:i-1,1:i-1), using an update of the form: A := A - V*W' - W*V' */ i__3 = i - 1; q__1.r = -1.f, q__1.i = 0.f; cher2k_(uplo, "No transpose", &i__3, &nb, &q__1, &A(1,i), lda, &WORK(1), &ldwork, &c_b23, &A(1,1), lda); /* Copy superdiagonal elements back into A, and diagonal elements into D */ i__3 = i + nb - 1; for (j = i; j <= i+nb-1; ++j) { i__4 = j - 1 + j * a_dim1; i__5 = j - 1; A(j-1,j).r = E(j-1), A(j-1,j).i = 0.f; i__4 = j; i__5 = j + j * a_dim1; D(j) = A(j,j).r; /* L10: */ } /* L20: */ } /* Use unblocked code to reduce the last or only block */ chetd2_(uplo, &kk, &A(1,1), lda, &D(1), &E(1), &TAU(1), &iinfo); } else { /* Reduce the lower triangle of A */ i__2 = *n - nx; i__1 = nb; for (i = 1; nb < 0 ? i >= *n-nx : i <= *n-nx; i += nb) { /* Reduce columns i:i+nb-1 to tridiagonal form and form the matrix W which is needed to update the unreduced part of the matrix */ i__3 = *n - i + 1; clatrd_(uplo, &i__3, &nb, &A(i,i), lda, &E(i), &TAU(i), &WORK(1), &ldwork); /* Update the unreduced submatrix A(i+nb:n,i+nb:n), usin g an update of the form: A := A - V*W' - W*V' */ i__3 = *n - i - nb + 1; q__1.r = -1.f, q__1.i = 0.f; cher2k_(uplo, "No transpose", &i__3, &nb, &q__1, &A(i+nb,i), lda, &WORK(nb + 1), &ldwork, &c_b23, &A(i+nb,i+nb), lda); /* Copy subdiagonal elements back into A, and diagonal elements into D */ i__3 = i + nb - 1; for (j = i; j <= i+nb-1; ++j) { i__4 = j + 1 + j * a_dim1; i__5 = j; A(j+1,j).r = E(j), A(j+1,j).i = 0.f; i__4 = j; i__5 = j + j * a_dim1; D(j) = A(j,j).r; /* L30: */ } /* L40: */ } /* Use unblocked code to reduce the last or only block */ i__1 = *n - i + 1; chetd2_(uplo, &i__1, &A(i,i), lda, &D(i), &E(i), &TAU(i), & iinfo); } WORK(1).r = (real) iws, WORK(1).i = 0.f; return 0; /* End of CHETRD */ } /* chetrd_ */
/* Subroutine */ int zlatzm_(char *side, integer *m, integer *n, doublecomplex *v, integer *incv, doublecomplex *tau, doublecomplex * c1, doublecomplex *c2, integer *ldc, doublecomplex *work) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= ZLATZM applies a Householder matrix generated by ZTZRQF to a matrix. Let P = I - tau*u*u', u = ( 1 ), ( v ) where v is an (m-1) vector if SIDE = 'L', or a (n-1) vector if SIDE = 'R'. If SIDE equals 'L', let C = [ C1 ] 1 [ C2 ] m-1 n Then C is overwritten by P*C. If SIDE equals 'R', let C = [ C1, C2 ] m 1 n-1 Then C is overwritten by C*P. Arguments ========= SIDE (input) CHARACTER*1 = 'L': form P * C = 'R': form C * P M (input) INTEGER The number of rows of the matrix C. N (input) INTEGER The number of columns of the matrix C. V (input) COMPLEX*16 array, dimension (1 + (M-1)*abs(INCV)) if SIDE = 'L' (1 + (N-1)*abs(INCV)) if SIDE = 'R' The vector v in the representation of P. V is not used if TAU = 0. INCV (input) INTEGER The increment between elements of v. INCV <> 0 TAU (input) COMPLEX*16 The value tau in the representation of P. C1 (input/output) COMPLEX*16 array, dimension (LDC,N) if SIDE = 'L' (M,1) if SIDE = 'R' On entry, the n-vector C1 if SIDE = 'L', or the m-vector C1 if SIDE = 'R'. On exit, the first row of P*C if SIDE = 'L', or the first column of C*P if SIDE = 'R'. C2 (input/output) COMPLEX*16 array, dimension (LDC, N) if SIDE = 'L' (LDC, N-1) if SIDE = 'R' On entry, the (m - 1) x n matrix C2 if SIDE = 'L', or the m x (n - 1) matrix C2 if SIDE = 'R'. On exit, rows 2:m of P*C if SIDE = 'L', or columns 2:m of C*P if SIDE = 'R'. LDC (input) INTEGER The leading dimension of the arrays C1 and C2. LDC >= max(1,M). WORK (workspace) COMPLEX*16 array, dimension (N) if SIDE = 'L' (M) if SIDE = 'R' ===================================================================== Parameter adjustments Function Body */ /* Table of constant values */ static doublecomplex c_b1 = {1.,0.}; static integer c__1 = 1; /* System generated locals */ integer c1_dim1, c1_offset, c2_dim1, c2_offset, i__1; doublecomplex z__1; /* Local variables */ extern logical lsame_(char *, char *); extern /* Subroutine */ int zgerc_(integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *), zgemv_(char *, integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, doublecomplex *, integer *), zgeru_(integer *, integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *, doublecomplex *, integer *) , zcopy_(integer *, doublecomplex *, integer *, doublecomplex *, integer *), zaxpy_(integer *, doublecomplex *, doublecomplex *, integer *, doublecomplex *, integer *), zlacgv_(integer *, doublecomplex *, integer *); #define V(I) v[(I)-1] #define WORK(I) work[(I)-1] #define C2(I,J) c2[(I)-1 + ((J)-1)* ( *ldc)] #define C1(I,J) c1[(I)-1 + ((J)-1)* ( *ldc)] if (min(*m,*n) == 0 || tau->r == 0. && tau->i == 0.) { return 0; } if (lsame_(side, "L")) { /* w := conjg( C1 + v' * C2 ) */ zcopy_(n, &C1(1,1), ldc, &WORK(1), &c__1); zlacgv_(n, &WORK(1), &c__1); i__1 = *m - 1; zgemv_("Conjugate transpose", &i__1, n, &c_b1, &C2(1,1), ldc, & V(1), incv, &c_b1, &WORK(1), &c__1); /* [ C1 ] := [ C1 ] - tau* [ 1 ] * w' [ C2 ] [ C2 ] [ v ] */ zlacgv_(n, &WORK(1), &c__1); z__1.r = -tau->r, z__1.i = -tau->i; zaxpy_(n, &z__1, &WORK(1), &c__1, &C1(1,1), ldc); i__1 = *m - 1; z__1.r = -tau->r, z__1.i = -tau->i; zgeru_(&i__1, n, &z__1, &V(1), incv, &WORK(1), &c__1, &C2(1,1), ldc); } else if (lsame_(side, "R")) { /* w := C1 + C2 * v */ zcopy_(m, &C1(1,1), &c__1, &WORK(1), &c__1); i__1 = *n - 1; zgemv_("No transpose", m, &i__1, &c_b1, &C2(1,1), ldc, &V(1), incv, &c_b1, &WORK(1), &c__1); /* [ C1, C2 ] := [ C1, C2 ] - tau* w * [ 1 , v'] */ z__1.r = -tau->r, z__1.i = -tau->i; zaxpy_(m, &z__1, &WORK(1), &c__1, &C1(1,1), &c__1); i__1 = *n - 1; z__1.r = -tau->r, z__1.i = -tau->i; zgerc_(m, &i__1, &z__1, &WORK(1), &c__1, &V(1), incv, &C2(1,1), ldc); } return 0; /* End of ZLATZM */ } /* zlatzm_ */
/* Subroutine */ int cgeql2_(integer *m, integer *n, complex *a, integer *lda, complex *tau, complex *work, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= CGEQL2 computes a QL factorization of a complex m by n matrix A: A = Q * L. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. A (input/output) COMPLEX array, dimension (LDA,N) On entry, the m by n matrix A. On exit, if m >= n, the lower triangle of the subarray A(m-n+1:m,1:n) contains the n by n lower triangular matrix L; if m <= n, the elements on and below the (n-m)-th superdiagonal contain the m by n lower trapezoidal matrix L; the remaining elements, with the array TAU, represent the unitary matrix Q as a product of elementary reflectors (see Further Details). LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). TAU (output) COMPLEX array, dimension (min(M,N)) The scalar factors of the elementary reflectors (see Further Details). WORK (workspace) COMPLEX array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Further Details =============== The matrix Q is represented as a product of elementary reflectors Q = H(k) . . . H(2) H(1), where k = min(m,n). Each H(i) has the form H(i) = I - tau * v * v' where tau is a complex scalar, and v is a complex vector with v(m-k+i+1:m) = 0 and v(m-k+i) = 1; v(1:m-k+i-1) is stored on exit in A(1:m-k+i-1,n-k+i), and tau in TAU(i). ===================================================================== Test the input arguments Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer a_dim1, a_offset, i__1, i__2; complex q__1; /* Builtin functions */ void r_cnjg(complex *, complex *); /* Local variables */ static integer i, k; static complex alpha; extern /* Subroutine */ int clarf_(char *, integer *, integer *, complex * , integer *, complex *, complex *, integer *, complex *), clarfg_(integer *, complex *, complex *, integer *, complex *), xerbla_(char *, integer *); #define TAU(I) tau[(I)-1] #define WORK(I) work[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] *info = 0; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*lda < max(1,*m)) { *info = -4; } if (*info != 0) { i__1 = -(*info); xerbla_("CGEQL2", &i__1); return 0; } k = min(*m,*n); for (i = k; i >= 1; --i) { /* Generate elementary reflector H(i) to annihilate A(1:m-k+i-1,n-k+i) */ i__1 = *m - k + i + (*n - k + i) * a_dim1; alpha.r = A(*m-k+i,*n-k+i).r, alpha.i = A(*m-k+i,*n-k+i).i; i__1 = *m - k + i; clarfg_(&i__1, &alpha, &A(1,*n-k+i), &c__1, &TAU(i)); /* Apply H(i)' to A(1:m-k+i,1:n-k+i-1) from the left */ i__1 = *m - k + i + (*n - k + i) * a_dim1; A(*m-k+i,*n-k+i).r = 1.f, A(*m-k+i,*n-k+i).i = 0.f; i__1 = *m - k + i; i__2 = *n - k + i - 1; r_cnjg(&q__1, &TAU(i)); clarf_("Left", &i__1, &i__2, &A(1,*n-k+i), &c__1, & q__1, &A(1,1), lda, &WORK(1)); i__1 = *m - k + i + (*n - k + i) * a_dim1; A(*m-k+i,*n-k+i).r = alpha.r, A(*m-k+i,*n-k+i).i = alpha.i; /* L10: */ } return 0; /* End of CGEQL2 */ } /* cgeql2_ */
/* Subroutine */ int dsyrfs_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *af, integer *ldaf, integer * ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info) { /* -- LAPACK routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DSYRFS improves the computed solution to a system of linear equations when the coefficient matrix is symmetric indefinite, and provides error bounds and backward error estimates for the solution. Arguments ========= UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrices B and X. NRHS >= 0. A (input) DOUBLE PRECISION array, dimension (LDA,N) The symmetric matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). AF (input) DOUBLE PRECISION array, dimension (LDAF,N) The factored form of the matrix A. AF contains the block diagonal matrix D and the multipliers used to obtain the factor U or L from the factorization A = U*D*U**T or A = L*D*L**T as computed by DSYTRF. LDAF (input) INTEGER The leading dimension of the array AF. LDAF >= max(1,N). IPIV (input) INTEGER array, dimension (N) Details of the interchanges and the block structure of D as determined by DSYTRF. B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) The right hand side matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) On entry, the solution matrix X, as computed by DSYTRS. On exit, the improved solution matrix X. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). FERR (output) DOUBLE PRECISION array, dimension (NRHS) The estimated forward error bound for each solution vector X(j) (the j-th column of the solution matrix X). If XTRUE is the true solution corresponding to X(j), FERR(j) is an estimated upper bound for the magnitude of the largest element in (X(j) - XTRUE) divided by the magnitude of the largest element in X(j). The estimate is as reliable as the estimate for RCOND, and is almost always a slight overestimate of the true error. BERR (output) DOUBLE PRECISION array, dimension (NRHS) The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution). WORK (workspace) DOUBLE PRECISION array, dimension (3*N) IWORK (workspace) INTEGER array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Internal Parameters =================== ITMAX is the maximum number of steps of iterative refinement. ===================================================================== Test the input parameters. Parameter adjustments Function Body */ /* Table of constant values */ static integer c__1 = 1; static doublereal c_b12 = -1.; static doublereal c_b14 = 1.; /* System generated locals */ integer a_dim1, a_offset, af_dim1, af_offset, b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3; doublereal d__1, d__2, d__3; /* Local variables */ static integer kase; static doublereal safe1, safe2; static integer i, j, k; static doublereal s; extern logical lsame_(char *, char *); extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static integer count; static logical upper; extern /* Subroutine */ int dsymv_(char *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern doublereal dlamch_(char *); extern /* Subroutine */ int dlacon_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static doublereal xk; static integer nz; static doublereal safmin; extern /* Subroutine */ int xerbla_(char *, integer *); static doublereal lstres; extern /* Subroutine */ int dsytrs_(char *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); static doublereal eps; #define IPIV(I) ipiv[(I)-1] #define FERR(I) ferr[(I)-1] #define BERR(I) berr[(I)-1] #define WORK(I) work[(I)-1] #define IWORK(I) iwork[(I)-1] #define A(I,J) a[(I)-1 + ((J)-1)* ( *lda)] #define AF(I,J) af[(I)-1 + ((J)-1)* ( *ldaf)] #define B(I,J) b[(I)-1 + ((J)-1)* ( *ldb)] #define X(I,J) x[(I)-1 + ((J)-1)* ( *ldx)] *info = 0; upper = lsame_(uplo, "U"); if (! upper && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldaf < max(1,*n)) { *info = -7; } else if (*ldb < max(1,*n)) { *info = -10; } else if (*ldx < max(1,*n)) { *info = -12; } if (*info != 0) { i__1 = -(*info); xerbla_("DSYRFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= *nrhs; ++j) { FERR(j) = 0.; BERR(j) = 0.; /* L10: */ } return 0; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 */ nz = *n + 1; eps = dlamch_("Epsilon"); safmin = dlamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= *nrhs; ++j) { count = 1; lstres = 3.; L20: /* Loop until stopping criterion is satisfied. Compute residual R = B - A * X */ dcopy_(n, &B(1,j), &c__1, &WORK(*n + 1), &c__1); dsymv_(uplo, n, &c_b12, &A(1,1), lda, &X(1,j), &c__1, &c_b14, &WORK(*n + 1), &c__1); /* Compute componentwise relative backward error from formula max(i) ( abs(R(i)) / ( abs(A)*abs(X) + abs(B) )(i) ) where abs(Z) is the componentwise absolute value of the matr ix or vector Z. If the i-th component of the denominator is le ss than SAFE2, then SAFE1 is added to the i-th components of th e numerator and denominator before dividing. */ i__2 = *n; for (i = 1; i <= *n; ++i) { WORK(i) = (d__1 = B(i,j), abs(d__1)); /* L30: */ } /* Compute abs(A)*abs(X) + abs(B). */ if (upper) { i__2 = *n; for (k = 1; k <= *n; ++k) { s = 0.; xk = (d__1 = X(k,j), abs(d__1)); i__3 = k - 1; for (i = 1; i <= k-1; ++i) { WORK(i) += (d__1 = A(i,k), abs(d__1)) * xk; s += (d__1 = A(i,k), abs(d__1)) * (d__2 = X(i,j), abs(d__2)); /* L40: */ } WORK(k) = WORK(k) + (d__1 = A(k,k), abs(d__1)) * xk + s; /* L50: */ } } else { i__2 = *n; for (k = 1; k <= *n; ++k) { s = 0.; xk = (d__1 = X(k,j), abs(d__1)); WORK(k) += (d__1 = A(k,k), abs(d__1)) * xk; i__3 = *n; for (i = k + 1; i <= *n; ++i) { WORK(i) += (d__1 = A(i,k), abs(d__1)) * xk; s += (d__1 = A(i,k), abs(d__1)) * (d__2 = X(i,j), abs(d__2)); /* L60: */ } WORK(k) += s; /* L70: */ } } s = 0.; i__2 = *n; for (i = 1; i <= *n; ++i) { if (WORK(i) > safe2) { /* Computing MAX */ d__2 = s, d__3 = (d__1 = WORK(*n + i), abs(d__1)) / WORK(i); s = max(d__2,d__3); } else { /* Computing MAX */ d__2 = s, d__3 = ((d__1 = WORK(*n + i), abs(d__1)) + safe1) / (WORK(i) + safe1); s = max(d__2,d__3); } /* L80: */ } BERR(j) = s; /* Test stopping criterion. Continue iterating if 1) The residual BERR(J) is larger than machine epsilon, a nd 2) BERR(J) decreased by at least a factor of 2 during the last iteration, and 3) At most ITMAX iterations tried. */ if (BERR(j) > eps && BERR(j) * 2. <= lstres && count <= 5) { /* Update solution and try again. */ dsytrs_(uplo, n, &c__1, &AF(1,1), ldaf, &IPIV(1), &WORK(*n + 1), n, info); daxpy_(n, &c_b14, &WORK(*n + 1), &c__1, &X(1,j), &c__1) ; lstres = BERR(j); ++count; goto L20; } /* Bound error from formula norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(A))* ( abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) / norm(X) where norm(Z) is the magnitude of the largest component of Z inv(A) is the inverse of A abs(Z) is the componentwise absolute value of the matrix o r vector Z NZ is the maximum number of nonzeros in any row of A, plus 1 EPS is machine epsilon The i-th component of abs(R)+NZ*EPS*(abs(A)*abs(X)+abs(B)) is incremented by SAFE1 if the i-th component of abs(A)*abs(X) + abs(B) is less than SAFE2. Use DLACON to estimate the infinity-norm of the matrix inv(A) * diag(W), where W = abs(R) + NZ*EPS*( abs(A)*abs(X)+abs(B) ))) */ i__2 = *n; for (i = 1; i <= *n; ++i) { if (WORK(i) > safe2) { WORK(i) = (d__1 = WORK(*n + i), abs(d__1)) + nz * eps * WORK( i); } else { WORK(i) = (d__1 = WORK(*n + i), abs(d__1)) + nz * eps * WORK( i) + safe1; } /* L90: */ } kase = 0; L100: dlacon_(n, &WORK((*n << 1) + 1), &WORK(*n + 1), &IWORK(1), &FERR(j), & kase); if (kase != 0) { if (kase == 1) { /* Multiply by diag(W)*inv(A'). */ dsytrs_(uplo, n, &c__1, &AF(1,1), ldaf, &IPIV(1), &WORK( *n + 1), n, info); i__2 = *n; for (i = 1; i <= *n; ++i) { WORK(*n + i) = WORK(i) * WORK(*n + i); /* L110: */ } } else if (kase == 2) { /* Multiply by inv(A)*diag(W). */ i__2 = *n; for (i = 1; i <= *n; ++i) { WORK(*n + i) = WORK(i) * WORK(*n + i); /* L120: */ } dsytrs_(uplo, n, &c__1, &AF(1,1), ldaf, &IPIV(1), &WORK( *n + 1), n, info); } goto L100; } /* Normalize error. */ lstres = 0.; i__2 = *n; for (i = 1; i <= *n; ++i) { /* Computing MAX */ d__2 = lstres, d__3 = (d__1 = X(i,j), abs(d__1)); lstres = max(d__2,d__3); /* L130: */ } if (lstres != 0.) { FERR(j) /= lstres; } /* L140: */ } return 0; /* End of DSYRFS */ } /* dsyrfs_ */