double SPDTensorDL::f(Variable *x) const { const double *xptr = x->ObtainReadData(); /*each slice of Xaj is B alpha_j in [(6), CS15]*/ SharedSpace *Xalpha = new SharedSpace(3, dim, dim, N); double *Xalphaptr = Xalpha->ObtainWriteEntireData(); integer dd = dim * dim, nnum = num, NN = N; /*Xalpha <-- \mathbb{B} alpha*/ dgemm_(GLOBAL::N, GLOBAL::N, &dd, &NN, &nnum, &GLOBAL::DONE, const_cast<double *> (xptr), &dd, alpha, &nnum, &GLOBAL::DZERO, Xalphaptr, &dd); x->AddToTempData("Xalpha", Xalpha); /*compute cholesky decomposition for all slices in Xalpha*/ SPDTensor *Mani = dynamic_cast<SPDTensor *> (Domain); Mani->CholeskyRepresentation(x); const SharedSpace *SharedL = x->ObtainReadTempData("XaL"); const double *L = SharedL->ObtainReadData(); SharedSpace *SharedlogLXL = new SharedSpace(3, dim, dim, N); double *logLXL = SharedlogLXL->ObtainWriteEntireData(); double *Ltmp = new double[dim * dim]; integer length = dim * dim, ddim = dim, info; for (integer i = 0; i < N; i++) { dcopy_(&length, const_cast<double *> (L) + i * length, &GLOBAL::IONE, Ltmp, &GLOBAL::IONE); /*Solve the linear system Ls X = Li, i.e., X = Ls^{-1} Li. The solution X is stored in Li. Note that Li is a lower triangular matrix. Details: http://www.netlib.org/lapack/explore-html/d6/d6f/dtrtrs_8f.html */ dtrtrs_(GLOBAL::L, GLOBAL::N, GLOBAL::N, &ddim, &ddim, Ls + dim * dim * i, &ddim, Ltmp, &ddim, &info); if (info != 0) { std::cout << "Warning: Solving linear system in SPDTensorDL::f failed with info:" << info << "!" << std::endl; } dgemm_(GLOBAL::N, GLOBAL::T, &ddim, &ddim, &ddim, &GLOBAL::DONE, Ltmp, &ddim, Ltmp, &ddim, &GLOBAL::DZERO, logLXL + ddim * ddim * i, &ddim); Matrix MMt(logLXL + ddim * ddim * i, ddim, ddim); Matrix::LogSymmetricM(GLOBAL::L, MMt, MMt); } delete[] Ltmp; length = dim * dim * N; double result = dnrm2_(&length, logLXL, &GLOBAL::IONE); x->AddToTempData("logLXL", SharedlogLXL); result *= result; result /= 2.0; /*add \Omega(X) = \sum \tr(X_i)*/ for (integer i = 0; i < num; i++) { for (integer j = 0; j < dim; j++) { result += lambdaX * xptr[i * dim * dim + j * dim + j]; } } return result; };
/* Solve a triangular system of the form A * X = B or A^T * X = B */ void THLapack_(trtrs)(char uplo, char trans, char diag, int n, int nrhs, real *a, int lda, real *b, int ldb, int* info) { #ifdef USE_LAPACK #if defined(TH_REAL_IS_DOUBLE) dtrtrs_(&uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, info); #else strtrs_(&uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, info); #endif #else THError("trtrs : Lapack library not found in compile time\n"); #endif return; }
static void upper_tri_solve(mat u, double* b) { double* a = (double*) malloc(sizeof(double)*u->r*u->c); char uplo = 'U'; char trans = 'N'; char diag = 'N'; int nrhs = 1, lda = u->r, ldb = u->c; int info; int loc, i, j; loc = 0; for(j = 0; j < u->c; j++) { for(i = 0; i < u->r; i++) { a[loc++] = u->m[mindex(i,j,u)]; } } dtrtrs_(&uplo, &trans, &diag, &u->c, &nrhs, a, &lda, b, &ldb, &info); free(a); }
/* Subroutine */ int dggglm_(integer *n, integer *m, integer *p, doublereal * a, integer *lda, doublereal *b, integer *ldb, doublereal *d__, doublereal *x, doublereal *y, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4; /* Local variables */ integer i__, nb, np, nb1, nb2, nb3, nb4, lopt; extern /* Subroutine */ int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), dggqrf_( integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer lwkmin; extern /* Subroutine */ int dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), dormrq_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); integer lwkopt; logical lquery; extern /* Subroutine */ int dtrtrs_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); /* -- LAPACK driver routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGGGLM solves a general Gauss-Markov linear model (GLM) problem: */ /* minimize || y ||_2 subject to d = A*x + B*y */ /* x */ /* where A is an N-by-M matrix, B is an N-by-P matrix, and d is a */ /* given N-vector. It is assumed that M <= N <= M+P, and */ /* rank(A) = M and rank( A B ) = N. */ /* Under these assumptions, the constrained equation is always */ /* consistent, and there is a unique solution x and a minimal 2-norm */ /* solution y, which is obtained using a generalized QR factorization */ /* of the matrices (A, B) given by */ /* A = Q*(R), B = Q*T*Z. */ /* (0) */ /* In particular, if matrix B is square nonsingular, then the problem */ /* GLM is equivalent to the following weighted linear least squares */ /* problem */ /* minimize || inv(B)*(d-A*x) ||_2 */ /* x */ /* where inv(B) denotes the inverse of B. */ /* Arguments */ /* ========= */ /* N (input) INTEGER */ /* The number of rows of the matrices A and B. N >= 0. */ /* M (input) INTEGER */ /* The number of columns of the matrix A. 0 <= M <= N. */ /* P (input) INTEGER */ /* The number of columns of the matrix B. P >= N-M. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,M) */ /* On entry, the N-by-M matrix A. */ /* On exit, the upper triangular part of the array A contains */ /* the M-by-M upper triangular matrix R. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,P) */ /* On entry, the N-by-P matrix B. */ /* On exit, if N <= P, the upper triangle of the subarray */ /* B(1:N,P-N+1:P) contains the N-by-N upper triangular matrix T; */ /* if N > P, the elements on and above the (N-P)th subdiagonal */ /* contain the N-by-P upper trapezoidal matrix T. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* D (input/output) DOUBLE PRECISION array, dimension (N) */ /* On entry, D is the left hand side of the GLM equation. */ /* On exit, D is destroyed. */ /* X (output) DOUBLE PRECISION array, dimension (M) */ /* Y (output) DOUBLE PRECISION array, dimension (P) */ /* On exit, X and Y are the solutions of the GLM problem. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,N+M+P). */ /* For optimum performance, LWORK >= M+min(N,P)+max(N,P)*NB, */ /* where NB is an upper bound for the optimal blocksizes for */ /* DGEQRF, SGERQF, DORMQR and SORMRQ. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* = 1: the upper triangular factor R associated with A in the */ /* generalized QR factorization of the pair (A, B) is */ /* singular, so that rank(A) < M; the least squares */ /* solution could not be computed. */ /* = 2: the bottom (N-M) by (N-M) part of the upper trapezoidal */ /* factor T associated with B in the generalized QR */ /* factorization of the pair (A, B) is singular, so that */ /* rank( A B ) < N; the least squares solution could not */ /* be computed. */ /* =================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --d__; --x; --y; --work; /* Function Body */ *info = 0; np = min(*n,*p); lquery = *lwork == -1; if (*n < 0) { *info = -1; } else if (*m < 0 || *m > *n) { *info = -2; } else if (*p < 0 || *p < *n - *m) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } /* Calculate workspace */ if (*info == 0) { if (*n == 0) { lwkmin = 1; lwkopt = 1; } else { nb1 = ilaenv_(&c__1, "DGEQRF", " ", n, m, &c_n1, &c_n1); nb2 = ilaenv_(&c__1, "DGERQF", " ", n, m, &c_n1, &c_n1); nb3 = ilaenv_(&c__1, "DORMQR", " ", n, m, p, &c_n1); nb4 = ilaenv_(&c__1, "DORMRQ", " ", n, m, p, &c_n1); /* Computing MAX */ i__1 = max(nb1,nb2), i__1 = max(i__1,nb3); nb = max(i__1,nb4); lwkmin = *m + *n + *p; lwkopt = *m + np + max(*n,*p) * nb; } work[1] = (doublereal) lwkopt; if (*lwork < lwkmin && ! lquery) { *info = -12; } } if (*info != 0) { i__1 = -(*info); xerbla_("DGGGLM", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Compute the GQR factorization of matrices A and B: */ /* Q'*A = ( R11 ) M, Q'*B*Z' = ( T11 T12 ) M */ /* ( 0 ) N-M ( 0 T22 ) N-M */ /* M M+P-N N-M */ /* where R11 and T22 are upper triangular, and Q and Z are */ /* orthogonal. */ i__1 = *lwork - *m - np; dggqrf_(n, m, p, &a[a_offset], lda, &work[1], &b[b_offset], ldb, &work[*m + 1], &work[*m + np + 1], &i__1, info); lopt = (integer) work[*m + np + 1]; /* Update left-hand-side vector d = Q'*d = ( d1 ) M */ /* ( d2 ) N-M */ i__1 = max(1,*n); i__2 = *lwork - *m - np; dormqr_("Left", "Transpose", n, &c__1, m, &a[a_offset], lda, &work[1], & d__[1], &i__1, &work[*m + np + 1], &i__2, info); /* Computing MAX */ i__1 = lopt, i__2 = (integer) work[*m + np + 1]; lopt = max(i__1,i__2); /* Solve T22*y2 = d2 for y2 */ if (*n > *m) { i__1 = *n - *m; i__2 = *n - *m; dtrtrs_("Upper", "No transpose", "Non unit", &i__1, &c__1, &b[*m + 1 + (*m + *p - *n + 1) * b_dim1], ldb, &d__[*m + 1], &i__2, info); if (*info > 0) { *info = 1; return 0; } i__1 = *n - *m; dcopy_(&i__1, &d__[*m + 1], &c__1, &y[*m + *p - *n + 1], &c__1); } /* Set y1 = 0 */ i__1 = *m + *p - *n; for (i__ = 1; i__ <= i__1; ++i__) { y[i__] = 0.; /* L10: */ } /* Update d1 = d1 - T12*y2 */ i__1 = *n - *m; dgemv_("No transpose", m, &i__1, &c_b32, &b[(*m + *p - *n + 1) * b_dim1 + 1], ldb, &y[*m + *p - *n + 1], &c__1, &c_b34, &d__[1], &c__1); /* Solve triangular system: R11*x = d1 */ if (*m > 0) { dtrtrs_("Upper", "No Transpose", "Non unit", m, &c__1, &a[a_offset], lda, &d__[1], m, info); if (*info > 0) { *info = 2; return 0; } /* Copy D to X */ dcopy_(m, &d__[1], &c__1, &x[1], &c__1); } /* Backward transformation y = Z'*y */ /* Computing MAX */ i__1 = 1, i__2 = *n - *p + 1; i__3 = max(1,*p); i__4 = *lwork - *m - np; dormrq_("Left", "Transpose", p, &c__1, &np, &b[max(i__1, i__2)+ b_dim1], ldb, &work[*m + 1], &y[1], &i__3, &work[*m + np + 1], &i__4, info); /* Computing MAX */ i__1 = lopt, i__2 = (integer) work[*m + np + 1]; work[1] = (doublereal) (*m + np + max(i__1,i__2)); return 0; /* End of DGGGLM */ } /* dggglm_ */
/* Subroutine */ int dgels_(char *trans, integer *m, integer *n, integer * nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; /* Local variables */ integer i__, j, nb, mn; doublereal anrm, bnrm; integer brow; logical tpsd; integer iascl, ibscl; extern logical lsame_(char *, char *); integer wsize; doublereal rwork[1]; extern /* Subroutine */ int dlabad_(doublereal *, doublereal *); extern doublereal dlamch_(char *), dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int dgelqf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), dgeqrf_(integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *); integer scllen; doublereal bignum; extern /* Subroutine */ int dormlq_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), dormqr_(char *, char *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); doublereal smlnum; logical lquery; extern /* Subroutine */ int dtrtrs_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); /* -- LAPACK driver routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DGELS solves overdetermined or underdetermined real linear systems */ /* involving an M-by-N matrix A, or its transpose, using a QR or LQ */ /* factorization of A. It is assumed that A has full rank. */ /* The following options are provided: */ /* 1. If TRANS = 'N' and m >= n: find the least squares solution of */ /* an overdetermined system, i.e., solve the least squares problem */ /* minimize || B - A*X ||. */ /* 2. If TRANS = 'N' and m < n: find the minimum norm solution of */ /* an underdetermined system A * X = B. */ /* 3. If TRANS = 'T' and m >= n: find the minimum norm solution of */ /* an undetermined system A**T * X = B. */ /* 4. If TRANS = 'T' and m < n: find the least squares solution of */ /* an overdetermined system, i.e., solve the least squares problem */ /* minimize || B - A**T * X ||. */ /* Several right hand side vectors b and solution vectors x can be */ /* handled in a single call; they are stored as the columns of the */ /* M-by-NRHS right hand side matrix B and the N-by-NRHS solution */ /* matrix X. */ /* Arguments */ /* ========= */ /* TRANS (input) CHARACTER*1 */ /* = 'N': the linear system involves A; */ /* = 'T': the linear system involves A**T. */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of right hand sides, i.e., the number of */ /* columns of the matrices B and X. NRHS >=0. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, */ /* if M >= N, A is overwritten by details of its QR */ /* factorization as returned by DGEQRF; */ /* if M < N, A is overwritten by details of its LQ */ /* factorization as returned by DGELQF. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the matrix B of right hand side vectors, stored */ /* columnwise; B is M-by-NRHS if TRANS = 'N', or N-by-NRHS */ /* if TRANS = 'T'. */ /* On exit, if INFO = 0, B is overwritten by the solution */ /* vectors, stored columnwise: */ /* if TRANS = 'N' and m >= n, rows 1 to n of B contain the least */ /* squares solution vectors; the residual sum of squares for the */ /* solution in each column is given by the sum of squares of */ /* elements N+1 to M in that column; */ /* if TRANS = 'N' and m < n, rows 1 to N of B contain the */ /* minimum norm solution vectors; */ /* if TRANS = 'T' and m >= n, rows 1 to M of B contain the */ /* minimum norm solution vectors; */ /* if TRANS = 'T' and m < n, rows 1 to M of B contain the */ /* least squares solution vectors; the residual sum of squares */ /* for the solution in each column is given by the sum of */ /* squares of elements M+1 to N in that column. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= MAX(1,M,N). */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. */ /* LWORK >= max( 1, MN + max( MN, NRHS ) ). */ /* For optimal performance, */ /* LWORK >= max( 1, MN + max( MN, NRHS )*NB ). */ /* where MN = min(M,N) and NB is the optimum block size. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit */ /* < 0: if INFO = -i, the i-th argument had an illegal value */ /* > 0: if INFO = i, the i-th diagonal element of the */ /* triangular factor of A is zero, so that A does not have */ /* full rank; the least squares solution could not be */ /* computed. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input arguments. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --work; /* Function Body */ *info = 0; mn = min(*m,*n); lquery = *lwork == -1; if (! (lsame_(trans, "N") || lsame_(trans, "T"))) { *info = -1; } else if (*m < 0) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*nrhs < 0) { *info = -4; } else if (*lda < max(1,*m)) { *info = -6; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = max(1,*m); if (*ldb < max(i__1,*n)) { *info = -8; } else /* if(complicated condition) */ { /* Computing MAX */ i__1 = 1, i__2 = mn + max(mn,*nrhs); if (*lwork < max(i__1,i__2) && ! lquery) { *info = -10; } } } /* Figure out optimal block size */ if (*info == 0 || *info == -10) { tpsd = TRUE_; if (lsame_(trans, "N")) { tpsd = FALSE_; } if (*m >= *n) { nb = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1); if (tpsd) { /* Computing MAX */ i__1 = nb, i__2 = ilaenv_(&c__1, "DORMQR", "LN", m, nrhs, n, & c_n1); nb = max(i__1,i__2); } else { /* Computing MAX */ i__1 = nb, i__2 = ilaenv_(&c__1, "DORMQR", "LT", m, nrhs, n, & c_n1); nb = max(i__1,i__2); } } else { nb = ilaenv_(&c__1, "DGELQF", " ", m, n, &c_n1, &c_n1); if (tpsd) { /* Computing MAX */ i__1 = nb, i__2 = ilaenv_(&c__1, "DORMLQ", "LT", n, nrhs, m, & c_n1); nb = max(i__1,i__2); } else { /* Computing MAX */ i__1 = nb, i__2 = ilaenv_(&c__1, "DORMLQ", "LN", n, nrhs, m, & c_n1); nb = max(i__1,i__2); } } /* Computing MAX */ i__1 = 1, i__2 = mn + max(mn,*nrhs) * nb; wsize = max(i__1,i__2); work[1] = (doublereal) wsize; } if (*info != 0) { i__1 = -(*info); xerbla_("DGELS ", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ /* Computing MIN */ i__1 = min(*m,*n); if (min(i__1,*nrhs) == 0) { i__1 = max(*m,*n); dlaset_("Full", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb); return 0; } /* Get machine parameters */ smlnum = dlamch_("S") / dlamch_("P"); bignum = 1. / smlnum; dlabad_(&smlnum, &bignum); /* Scale A, B if max element outside range [SMLNUM,BIGNUM] */ anrm = dlange_("M", m, n, &a[a_offset], lda, rwork); iascl = 0; if (anrm > 0. && anrm < smlnum) { /* Scale matrix norm up to SMLNUM */ dlascl_("G", &c__0, &c__0, &anrm, &smlnum, m, n, &a[a_offset], lda, info); iascl = 1; } else if (anrm > bignum) { /* Scale matrix norm down to BIGNUM */ dlascl_("G", &c__0, &c__0, &anrm, &bignum, m, n, &a[a_offset], lda, info); iascl = 2; } else if (anrm == 0.) { /* Matrix all zero. Return zero solution. */ i__1 = max(*m,*n); dlaset_("F", &i__1, nrhs, &c_b33, &c_b33, &b[b_offset], ldb); goto L50; } brow = *m; if (tpsd) { brow = *n; } bnrm = dlange_("M", &brow, nrhs, &b[b_offset], ldb, rwork); ibscl = 0; if (bnrm > 0. && bnrm < smlnum) { /* Scale matrix norm up to SMLNUM */ dlascl_("G", &c__0, &c__0, &bnrm, &smlnum, &brow, nrhs, &b[b_offset], ldb, info); ibscl = 1; } else if (bnrm > bignum) { /* Scale matrix norm down to BIGNUM */ dlascl_("G", &c__0, &c__0, &bnrm, &bignum, &brow, nrhs, &b[b_offset], ldb, info); ibscl = 2; } if (*m >= *n) { /* compute QR factorization of A */ i__1 = *lwork - mn; dgeqrf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info) ; /* workspace at least N, optimally N*NB */ if (! tpsd) { /* Least-Squares Problem min || A * X - B || */ /* B(1:M,1:NRHS) := Q' * B(1:M,1:NRHS) */ i__1 = *lwork - mn; dormqr_("Left", "Transpose", m, nrhs, n, &a[a_offset], lda, &work[ 1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); /* workspace at least NRHS, optimally NRHS*NB */ /* B(1:N,1:NRHS) := inv(R) * B(1:N,1:NRHS) */ dtrtrs_("Upper", "No transpose", "Non-unit", n, nrhs, &a[a_offset] , lda, &b[b_offset], ldb, info); if (*info > 0) { return 0; } scllen = *n; } else { /* Overdetermined system of equations A' * X = B */ /* B(1:N,1:NRHS) := inv(R') * B(1:N,1:NRHS) */ dtrtrs_("Upper", "Transpose", "Non-unit", n, nrhs, &a[a_offset], lda, &b[b_offset], ldb, info); if (*info > 0) { return 0; } /* B(N+1:M,1:NRHS) = ZERO */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *m; for (i__ = *n + 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = 0.; /* L10: */ } /* L20: */ } /* B(1:M,1:NRHS) := Q(1:N,:) * B(1:N,1:NRHS) */ i__1 = *lwork - mn; dormqr_("Left", "No transpose", m, nrhs, n, &a[a_offset], lda, & work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); /* workspace at least NRHS, optimally NRHS*NB */ scllen = *m; } } else { /* Compute LQ factorization of A */ i__1 = *lwork - mn; dgelqf_(m, n, &a[a_offset], lda, &work[1], &work[mn + 1], &i__1, info) ; /* workspace at least M, optimally M*NB. */ if (! tpsd) { /* underdetermined system of equations A * X = B */ /* B(1:M,1:NRHS) := inv(L) * B(1:M,1:NRHS) */ dtrtrs_("Lower", "No transpose", "Non-unit", m, nrhs, &a[a_offset] , lda, &b[b_offset], ldb, info); if (*info > 0) { return 0; } /* B(M+1:N,1:NRHS) = 0 */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = *m + 1; i__ <= i__2; ++i__) { b[i__ + j * b_dim1] = 0.; /* L30: */ } /* L40: */ } /* B(1:N,1:NRHS) := Q(1:N,:)' * B(1:M,1:NRHS) */ i__1 = *lwork - mn; dormlq_("Left", "Transpose", n, nrhs, m, &a[a_offset], lda, &work[ 1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); /* workspace at least NRHS, optimally NRHS*NB */ scllen = *n; } else { /* overdetermined system min || A' * X - B || */ /* B(1:N,1:NRHS) := Q * B(1:N,1:NRHS) */ i__1 = *lwork - mn; dormlq_("Left", "No transpose", n, nrhs, m, &a[a_offset], lda, & work[1], &b[b_offset], ldb, &work[mn + 1], &i__1, info); /* workspace at least NRHS, optimally NRHS*NB */ /* B(1:M,1:NRHS) := inv(L') * B(1:M,1:NRHS) */ dtrtrs_("Lower", "Transpose", "Non-unit", m, nrhs, &a[a_offset], lda, &b[b_offset], ldb, info); if (*info > 0) { return 0; } scllen = *m; } } /* Undo scaling */ if (iascl == 1) { dlascl_("G", &c__0, &c__0, &anrm, &smlnum, &scllen, nrhs, &b[b_offset] , ldb, info); } else if (iascl == 2) { dlascl_("G", &c__0, &c__0, &anrm, &bignum, &scllen, nrhs, &b[b_offset] , ldb, info); } if (ibscl == 1) { dlascl_("G", &c__0, &c__0, &smlnum, &bnrm, &scllen, nrhs, &b[b_offset] , ldb, info); } else if (ibscl == 2) { dlascl_("G", &c__0, &c__0, &bignum, &bnrm, &scllen, nrhs, &b[b_offset] , ldb, info); } L50: work[1] = (doublereal) wsize; return 0; /* End of DGELS */ } /* dgels_ */
void SPDTensorDL::EucGrad(Variable *x, Vector *gf) const { const SharedSpace *SharedlogLXL = x->ObtainReadTempData("logLXL"); const double *logLXL = SharedlogLXL->ObtainReadData(); double *Log_Ainv_X_Xinv = new double[dim * dim * N]; integer ddim = dim, info; const SharedSpace *SharedL = x->ObtainReadTempData("XaL"); const double *Lx = SharedL->ObtainReadData(); for (integer i = 0; i < N; i++) { /*tmp <-- log(Li^{-1} Xi Li^{-T}) Li^T */ dgemm_(GLOBAL::N, GLOBAL::T, &ddim, &ddim, &ddim, &GLOBAL::DONE, const_cast<double *> (logLXL + ddim * ddim * i), &ddim, Ls + dim * dim * i, &ddim, &GLOBAL::DZERO, Log_Ainv_X_Xinv + dim * dim * i, &ddim); /*Solve the linear system Li^T X = tmp, i.e., X = Li^{-T} log(Li^{-1} X Li^{-T}) Li^T. The solution X is stored in tmp. Note that Li is a lower triangular matrix. Details: http://www.netlib.org/lapack/explore-html/d6/d6f/dtrtrs_8f.html */ dtrtrs_(GLOBAL::L, GLOBAL::T, GLOBAL::N, &ddim, &ddim, Ls + dim * dim * i, &ddim, Log_Ainv_X_Xinv + dim * dim * i, &ddim, &info); if (info != 0) { std::cout << "The cholesky decompsotion in SPDTensorDL::EucGrad failed with info:" << info << "!" << std::endl; } for (integer j = 0; j < dim; j++) { double swaptmp = 0; for (integer k = j; k < dim; k++) { swaptmp = (Log_Ainv_X_Xinv + dim * dim * i)[k + j * dim]; (Log_Ainv_X_Xinv + dim * dim * i)[k + j * dim] = (Log_Ainv_X_Xinv + dim * dim * i)[j + k * dim]; (Log_Ainv_X_Xinv + dim * dim * i)[j + k * dim] = swaptmp; } } /*Solve the linear system X Lx^T = tmp, i.e., X = Li^{-T} log(Li^{-1} X Li^{-T}) Li^T Lx^{-T}. The solution X is stored in tmp. We solve Lx X^t = tmp^T instead. Details: http://www.netlib.org/lapack/explore-html/d6/d6f/dtrtrs_8f.html */ dtrtrs_(GLOBAL::L, GLOBAL::N, GLOBAL::N, &ddim, &ddim, const_cast<double *> (Lx)+dim * dim * i, &ddim, Log_Ainv_X_Xinv + dim * dim * i, &ddim, &info); /*Solve the linear system X Lx = tmp, i.e., X = Li^{-T} log(Li^{-1} X Li^{-T}) Li^T Lx^{-T} Lx^{-1}. The solution X is stored in tmp. We can solve system Lx^T X^T = tmp^T. Since the Euclidean gradient is symmetric, we can solve Lx^T X = tmp^T instead. Details: http://www.netlib.org/lapack/explore-html/d6/d6f/dtrtrs_8f.html */ dtrtrs_(GLOBAL::L, GLOBAL::T, GLOBAL::N, &ddim, &ddim, const_cast<double *> (Lx)+dim * dim * i, &ddim, Log_Ainv_X_Xinv + dim * dim * i, &ddim, &info); } double *gfVT = gf->ObtainWriteEntireData(); integer dd = dim * dim, nnum = num, NN = N; dgemm_(GLOBAL::N, GLOBAL::T, &dd, &nnum, &NN, &GLOBAL::DONE, Log_Ainv_X_Xinv, &dd, alpha, &nnum, &GLOBAL::DZERO, gfVT, &dd); delete[] Log_Ainv_X_Xinv; /*add \nabla (\Omega(X))_i = I*/ for (integer i = 0; i < num; i++) { for (integer j = 0; j < dim; j++) { gfVT[i * dim * dim + j * dim + j] += lambdaX; } } };
/* Subroutine */ int dchktr_(logical *dotype, integer *nn, integer *nval, integer *nnb, integer *nbval, integer *nns, integer *nsval, doublereal *thresh, logical *tsterr, integer *nmax, doublereal *a, doublereal *ainv, doublereal *b, doublereal *x, doublereal *xact, doublereal *work, doublereal *rwork, integer *iwork, integer *nout) { /* Initialized data */ static integer iseedy[4] = { 1988,1989,1990,1991 }; static char uplos[1*2] = "U" "L"; static char transs[1*3] = "N" "T" "C"; /* Format strings */ static char fmt_9999[] = "(\002 UPLO='\002,a1,\002', DIAG='\002,a1,\002'" ", N=\002,i5,\002, NB=\002,i4,\002, type \002,i2,\002, test(\002," "i2,\002)= \002,g12.5)"; static char fmt_9998[] = "(\002 UPLO='\002,a1,\002', TRANS='\002,a1,\002" "', DIAG='\002,a1,\002', N=\002,i5,\002, NB=\002,i4,\002, type" " \002,i2,\002, test(\002,i2,\002)= \002,g12" ".5)"; static char fmt_9997[] = "(\002 NORM='\002,a1,\002', UPLO ='\002,a1,\002" "', N=\002,i5,\002,\002,11x,\002 type \002,i2,\002, test(\002,i2" ",\002)=\002,g12.5)"; static char fmt_9996[] = "(1x,a6,\002( '\002,a1,\002', '\002,a1,\002', " "'\002,a1,\002', '\002,a1,\002',\002,i5,\002, ... ), type \002,i2," "\002, test(\002,i2,\002)=\002,g12.5)"; /* System generated locals */ address a__1[2], a__2[3], a__3[4]; integer i__1, i__2, i__3[2], i__4, i__5[3], i__6[4]; char ch__1[2], ch__2[3], ch__3[4]; /* Builtin functions */ /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen), s_cat(char *, char **, integer *, integer *, ftnlen); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ integer i__, k, n, nb, in, lda, inb; char diag[1]; integer imat, info; char path[3]; integer irhs, nrhs; char norm[1], uplo[1]; integer nrun; extern /* Subroutine */ int alahd_(integer *, char *); integer idiag; doublereal scale; extern /* Subroutine */ int dget04_(integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *); integer nfail, iseed[4]; extern logical lsame_(char *, char *); doublereal rcond, anorm; integer itran; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), dtrt01_(char *, char *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *), dtrt02_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *), dtrt03_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *), dtrt05_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *), dtrt06_( doublereal *, doublereal *, char *, char *, integer *, doublereal *, integer *, doublereal *, doublereal *); char trans[1]; integer iuplo, nerrs; doublereal dummy; char xtype[1]; extern /* Subroutine */ int alaerh_(char *, char *, integer *, integer *, char *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *, integer *); doublereal rcondc; extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), dlarhs_(char *, char *, char *, char *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *); doublereal rcondi; extern /* Subroutine */ int alasum_(char *, integer *, integer *, integer *, integer *); doublereal rcondo; extern doublereal dlantr_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *); doublereal ainvnm; extern /* Subroutine */ int dlatrs_(char *, char *, char *, char *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *), dlattr_( integer *, char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dtrcon_(char *, char *, char *, integer * , doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), xlaenv_(integer *, integer *), derrtr_(char *, integer *), dtrrfs_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, doublereal *, integer *, integer *), dtrtri_(char *, char *, integer *, doublereal *, integer *, integer *); doublereal result[9]; extern /* Subroutine */ int dtrtrs_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); /* Fortran I/O blocks */ static cilist io___27 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___36 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___38 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___40 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___41 = { 0, 0, 0, fmt_9996, 0 }; /* -- LAPACK test routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DCHKTR tests DTRTRI, -TRS, -RFS, and -CON, and DLATRS */ /* Arguments */ /* ========= */ /* DOTYPE (input) LOGICAL array, dimension (NTYPES) */ /* The matrix types to be used for testing. Matrices of type j */ /* (for 1 <= j <= NTYPES) are used for testing if DOTYPE(j) = */ /* .TRUE.; if DOTYPE(j) = .FALSE., then type j is not used. */ /* NN (input) INTEGER */ /* The number of values of N contained in the vector NVAL. */ /* NVAL (input) INTEGER array, dimension (NN) */ /* The values of the matrix column dimension N. */ /* NNB (input) INTEGER */ /* The number of values of NB contained in the vector NBVAL. */ /* NBVAL (input) INTEGER array, dimension (NNB) */ /* The values of the blocksize NB. */ /* NNS (input) INTEGER */ /* The number of values of NRHS contained in the vector NSVAL. */ /* NSVAL (input) INTEGER array, dimension (NNS) */ /* The values of the number of right hand sides NRHS. */ /* THRESH (input) DOUBLE PRECISION */ /* The threshold value for the test ratios. A result is */ /* included in the output file if RESULT >= THRESH. To have */ /* every test ratio printed, use THRESH = 0. */ /* TSTERR (input) LOGICAL */ /* Flag that indicates whether error exits are to be tested. */ /* NMAX (input) INTEGER */ /* The leading dimension of the work arrays. */ /* NMAX >= the maximum value of N in NVAL. */ /* A (workspace) DOUBLE PRECISION array, dimension (NMAX*NMAX) */ /* AINV (workspace) DOUBLE PRECISION array, dimension (NMAX*NMAX) */ /* B (workspace) DOUBLE PRECISION array, dimension (NMAX*NSMAX) */ /* where NSMAX is the largest entry in NSVAL. */ /* X (workspace) DOUBLE PRECISION array, dimension (NMAX*NSMAX) */ /* XACT (workspace) DOUBLE PRECISION array, dimension (NMAX*NSMAX) */ /* WORK (workspace) DOUBLE PRECISION array, dimension */ /* (NMAX*max(3,NSMAX)) */ /* RWORK (workspace) DOUBLE PRECISION array, dimension */ /* (max(NMAX,2*NSMAX)) */ /* IWORK (workspace) INTEGER array, dimension (NMAX) */ /* NOUT (input) INTEGER */ /* The unit number for output. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. Local Arrays .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Scalars in Common .. */ /* .. */ /* .. Common blocks .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Data statements .. */ /* Parameter adjustments */ --iwork; --rwork; --work; --xact; --x; --b; --ainv; --a; --nsval; --nbval; --nval; --dotype; /* Function Body */ /* .. */ /* .. Executable Statements .. */ /* Initialize constants and the random number seed. */ s_copy(path, "Double precision", (ftnlen)1, (ftnlen)16); s_copy(path + 1, "TR", (ftnlen)2, (ftnlen)2); nrun = 0; nfail = 0; nerrs = 0; for (i__ = 1; i__ <= 4; ++i__) { iseed[i__ - 1] = iseedy[i__ - 1]; /* L10: */ } /* Test the error exits */ if (*tsterr) { derrtr_(path, nout); } infoc_1.infot = 0; xlaenv_(&c__2, &c__2); i__1 = *nn; for (in = 1; in <= i__1; ++in) { /* Do for each value of N in NVAL */ n = nval[in]; lda = max(1,n); *(unsigned char *)xtype = 'N'; for (imat = 1; imat <= 10; ++imat) { /* Do the tests only if DOTYPE( IMAT ) is true. */ if (! dotype[imat]) { goto L80; } for (iuplo = 1; iuplo <= 2; ++iuplo) { /* Do first for UPLO = 'U', then for UPLO = 'L' */ *(unsigned char *)uplo = *(unsigned char *)&uplos[iuplo - 1]; /* Call DLATTR to generate a triangular test matrix. */ s_copy(srnamc_1.srnamt, "DLATTR", (ftnlen)6, (ftnlen)6); dlattr_(&imat, uplo, "No transpose", diag, iseed, &n, &a[1], & lda, &x[1], &work[1], &info); /* Set IDIAG = 1 for non-unit matrices, 2 for unit. */ if (lsame_(diag, "N")) { idiag = 1; } else { idiag = 2; } i__2 = *nnb; for (inb = 1; inb <= i__2; ++inb) { /* Do for each blocksize in NBVAL */ nb = nbval[inb]; xlaenv_(&c__1, &nb); /* + TEST 1 */ /* Form the inverse of A. */ dlacpy_(uplo, &n, &n, &a[1], &lda, &ainv[1], &lda); s_copy(srnamc_1.srnamt, "DTRTRI", (ftnlen)6, (ftnlen)6); dtrtri_(uplo, diag, &n, &ainv[1], &lda, &info); /* Check error code from DTRTRI. */ if (info != 0) { /* Writing concatenation */ i__3[0] = 1, a__1[0] = uplo; i__3[1] = 1, a__1[1] = diag; s_cat(ch__1, a__1, i__3, &c__2, (ftnlen)2); alaerh_(path, "DTRTRI", &info, &c__0, ch__1, &n, &n, & c_n1, &c_n1, &nb, &imat, &nfail, &nerrs, nout); } /* Compute the infinity-norm condition number of A. */ anorm = dlantr_("I", uplo, diag, &n, &n, &a[1], &lda, & rwork[1]); ainvnm = dlantr_("I", uplo, diag, &n, &n, &ainv[1], &lda, &rwork[1]); if (anorm <= 0. || ainvnm <= 0.) { rcondi = 1.; } else { rcondi = 1. / anorm / ainvnm; } /* Compute the residual for the triangular matrix times */ /* its inverse. Also compute the 1-norm condition number */ /* of A. */ dtrt01_(uplo, diag, &n, &a[1], &lda, &ainv[1], &lda, & rcondo, &rwork[1], result); /* Print the test ratio if it is .GE. THRESH. */ if (result[0] >= *thresh) { if (nfail == 0 && nerrs == 0) { alahd_(nout, path); } io___27.ciunit = *nout; s_wsfe(&io___27); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&nb, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&imat, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__1, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&result[0], (ftnlen)sizeof( doublereal)); e_wsfe(); ++nfail; } ++nrun; /* Skip remaining tests if not the first block size. */ if (inb != 1) { goto L60; } i__4 = *nns; for (irhs = 1; irhs <= i__4; ++irhs) { nrhs = nsval[irhs]; *(unsigned char *)xtype = 'N'; for (itran = 1; itran <= 3; ++itran) { /* Do for op(A) = A, A**T, or A**H. */ *(unsigned char *)trans = *(unsigned char *)& transs[itran - 1]; if (itran == 1) { *(unsigned char *)norm = 'O'; rcondc = rcondo; } else { *(unsigned char *)norm = 'I'; rcondc = rcondi; } /* + TEST 2 */ /* Solve and compute residual for op(A)*x = b. */ s_copy(srnamc_1.srnamt, "DLARHS", (ftnlen)6, ( ftnlen)6); dlarhs_(path, xtype, uplo, trans, &n, &n, &c__0, & idiag, &nrhs, &a[1], &lda, &xact[1], &lda, &b[1], &lda, iseed, &info); *(unsigned char *)xtype = 'C'; dlacpy_("Full", &n, &nrhs, &b[1], &lda, &x[1], & lda); s_copy(srnamc_1.srnamt, "DTRTRS", (ftnlen)6, ( ftnlen)6); dtrtrs_(uplo, trans, diag, &n, &nrhs, &a[1], &lda, &x[1], &lda, &info); /* Check error code from DTRTRS. */ if (info != 0) { /* Writing concatenation */ i__5[0] = 1, a__2[0] = uplo; i__5[1] = 1, a__2[1] = trans; i__5[2] = 1, a__2[2] = diag; s_cat(ch__2, a__2, i__5, &c__3, (ftnlen)3); alaerh_(path, "DTRTRS", &info, &c__0, ch__2, & n, &n, &c_n1, &c_n1, &nrhs, &imat, & nfail, &nerrs, nout); } /* This line is needed on a Sun SPARCstation. */ if (n > 0) { dummy = a[1]; } dtrt02_(uplo, trans, diag, &n, &nrhs, &a[1], &lda, &x[1], &lda, &b[1], &lda, &work[1], & result[1]); /* + TEST 3 */ /* Check solution from generated exact solution. */ dget04_(&n, &nrhs, &x[1], &lda, &xact[1], &lda, & rcondc, &result[2]); /* + TESTS 4, 5, and 6 */ /* Use iterative refinement to improve the solution */ /* and compute error bounds. */ s_copy(srnamc_1.srnamt, "DTRRFS", (ftnlen)6, ( ftnlen)6); dtrrfs_(uplo, trans, diag, &n, &nrhs, &a[1], &lda, &b[1], &lda, &x[1], &lda, &rwork[1], & rwork[nrhs + 1], &work[1], &iwork[1], & info); /* Check error code from DTRRFS. */ if (info != 0) { /* Writing concatenation */ i__5[0] = 1, a__2[0] = uplo; i__5[1] = 1, a__2[1] = trans; i__5[2] = 1, a__2[2] = diag; s_cat(ch__2, a__2, i__5, &c__3, (ftnlen)3); alaerh_(path, "DTRRFS", &info, &c__0, ch__2, & n, &n, &c_n1, &c_n1, &nrhs, &imat, & nfail, &nerrs, nout); } dget04_(&n, &nrhs, &x[1], &lda, &xact[1], &lda, & rcondc, &result[3]); dtrt05_(uplo, trans, diag, &n, &nrhs, &a[1], &lda, &b[1], &lda, &x[1], &lda, &xact[1], &lda, &rwork[1], &rwork[nrhs + 1], &result[4]); /* Print information about the tests that did not */ /* pass the threshold. */ for (k = 2; k <= 6; ++k) { if (result[k - 1] >= *thresh) { if (nfail == 0 && nerrs == 0) { alahd_(nout, path); } io___36.ciunit = *nout; s_wsfe(&io___36); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&nrhs, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&imat, (ftnlen) sizeof(integer)); do_fio(&c__1, (char *)&k, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&result[k - 1], ( ftnlen)sizeof(doublereal)); e_wsfe(); ++nfail; } /* L20: */ } nrun += 5; /* L30: */ } /* L40: */ } /* + TEST 7 */ /* Get an estimate of RCOND = 1/CNDNUM. */ for (itran = 1; itran <= 2; ++itran) { if (itran == 1) { *(unsigned char *)norm = 'O'; rcondc = rcondo; } else { *(unsigned char *)norm = 'I'; rcondc = rcondi; } s_copy(srnamc_1.srnamt, "DTRCON", (ftnlen)6, (ftnlen) 6); dtrcon_(norm, uplo, diag, &n, &a[1], &lda, &rcond, & work[1], &iwork[1], &info); /* Check error code from DTRCON. */ if (info != 0) { /* Writing concatenation */ i__5[0] = 1, a__2[0] = norm; i__5[1] = 1, a__2[1] = uplo; i__5[2] = 1, a__2[2] = diag; s_cat(ch__2, a__2, i__5, &c__3, (ftnlen)3); alaerh_(path, "DTRCON", &info, &c__0, ch__2, &n, & n, &c_n1, &c_n1, &c_n1, &imat, &nfail, & nerrs, nout); } dtrt06_(&rcond, &rcondc, uplo, diag, &n, &a[1], &lda, &rwork[1], &result[6]); /* Print the test ratio if it is .GE. THRESH. */ if (result[6] >= *thresh) { if (nfail == 0 && nerrs == 0) { alahd_(nout, path); } io___38.ciunit = *nout; s_wsfe(&io___38); do_fio(&c__1, norm, (ftnlen)1); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)) ; do_fio(&c__1, (char *)&imat, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&c__7, (ftnlen)sizeof( integer)); do_fio(&c__1, (char *)&result[6], (ftnlen)sizeof( doublereal)); e_wsfe(); ++nfail; } ++nrun; /* L50: */ } L60: ; } /* L70: */ } L80: ; } /* Use pathological test matrices to test DLATRS. */ for (imat = 11; imat <= 18; ++imat) { /* Do the tests only if DOTYPE( IMAT ) is true. */ if (! dotype[imat]) { goto L110; } for (iuplo = 1; iuplo <= 2; ++iuplo) { /* Do first for UPLO = 'U', then for UPLO = 'L' */ *(unsigned char *)uplo = *(unsigned char *)&uplos[iuplo - 1]; for (itran = 1; itran <= 3; ++itran) { /* Do for op(A) = A, A**T, and A**H. */ *(unsigned char *)trans = *(unsigned char *)&transs[itran - 1]; /* Call DLATTR to generate a triangular test matrix. */ s_copy(srnamc_1.srnamt, "DLATTR", (ftnlen)6, (ftnlen)6); dlattr_(&imat, uplo, trans, diag, iseed, &n, &a[1], &lda, &x[1], &work[1], &info); /* + TEST 8 */ /* Solve the system op(A)*x = b. */ s_copy(srnamc_1.srnamt, "DLATRS", (ftnlen)6, (ftnlen)6); dcopy_(&n, &x[1], &c__1, &b[1], &c__1); dlatrs_(uplo, trans, diag, "N", &n, &a[1], &lda, &b[1], & scale, &rwork[1], &info); /* Check error code from DLATRS. */ if (info != 0) { /* Writing concatenation */ i__6[0] = 1, a__3[0] = uplo; i__6[1] = 1, a__3[1] = trans; i__6[2] = 1, a__3[2] = diag; i__6[3] = 1, a__3[3] = "N"; s_cat(ch__3, a__3, i__6, &c__4, (ftnlen)4); alaerh_(path, "DLATRS", &info, &c__0, ch__3, &n, &n, & c_n1, &c_n1, &c_n1, &imat, &nfail, &nerrs, nout); } dtrt03_(uplo, trans, diag, &n, &c__1, &a[1], &lda, &scale, &rwork[1], &c_b101, &b[1], &lda, &x[1], &lda, & work[1], &result[7]); /* + TEST 9 */ /* Solve op(A)*X = b again with NORMIN = 'Y'. */ dcopy_(&n, &x[1], &c__1, &b[n + 1], &c__1); dlatrs_(uplo, trans, diag, "Y", &n, &a[1], &lda, &b[n + 1] , &scale, &rwork[1], &info); /* Check error code from DLATRS. */ if (info != 0) { /* Writing concatenation */ i__6[0] = 1, a__3[0] = uplo; i__6[1] = 1, a__3[1] = trans; i__6[2] = 1, a__3[2] = diag; i__6[3] = 1, a__3[3] = "Y"; s_cat(ch__3, a__3, i__6, &c__4, (ftnlen)4); alaerh_(path, "DLATRS", &info, &c__0, ch__3, &n, &n, & c_n1, &c_n1, &c_n1, &imat, &nfail, &nerrs, nout); } dtrt03_(uplo, trans, diag, &n, &c__1, &a[1], &lda, &scale, &rwork[1], &c_b101, &b[n + 1], &lda, &x[1], &lda, &work[1], &result[8]); /* Print information about the tests that did not pass */ /* the threshold. */ if (result[7] >= *thresh) { if (nfail == 0 && nerrs == 0) { alahd_(nout, path); } io___40.ciunit = *nout; s_wsfe(&io___40); do_fio(&c__1, "DLATRS", (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, "N", (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&imat, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__8, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&result[7], (ftnlen)sizeof( doublereal)); e_wsfe(); ++nfail; } if (result[8] >= *thresh) { if (nfail == 0 && nerrs == 0) { alahd_(nout, path); } io___41.ciunit = *nout; s_wsfe(&io___41); do_fio(&c__1, "DLATRS", (ftnlen)6); do_fio(&c__1, uplo, (ftnlen)1); do_fio(&c__1, trans, (ftnlen)1); do_fio(&c__1, diag, (ftnlen)1); do_fio(&c__1, "Y", (ftnlen)1); do_fio(&c__1, (char *)&n, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&imat, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&c__9, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&result[8], (ftnlen)sizeof( doublereal)); e_wsfe(); ++nfail; } nrun += 2; /* L90: */ } /* L100: */ } L110: ; } /* L120: */ } /* Print a summary of the results. */ alasum_(path, nout, &nfail, &nrun, &nerrs); return 0; /* End of DCHKTR */ } /* dchktr_ */
int trtrs(char uplo, char trans, char diag, int n, int nrhs, double* A, int lda, double* B, int ldb) { int info = 0; dtrtrs_(&uplo, &trans, &diag, &n, &nrhs, A, &lda, B, &ldb, &info); return info; }
/* Subroutine */ int dgglse_(integer *m, integer *n, integer *p, doublereal * a, integer *lda, doublereal *b, integer *ldb, doublereal *c__, doublereal *d__, doublereal *x, doublereal *work, integer *lwork, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2; /* Local variables */ integer nb, mn, nr, nb1, nb2, nb3, nb4, lopt; integer lwkmin; integer lwkopt; logical lquery; /* -- LAPACK driver routine (version 3.2) -- */ /* November 2006 */ /* Purpose */ /* ======= */ /* DGGLSE solves the linear equality-constrained least squares (LSE) */ /* problem: */ /* minimize || c - A*x ||_2 subject to B*x = d */ /* where A is an M-by-N matrix, B is a P-by-N matrix, c is a given */ /* M-vector, and d is a given P-vector. It is assumed that */ /* P <= N <= M+P, and */ /* rank(B) = P and rank( (A) ) = N. */ /* ( (B) ) */ /* These conditions ensure that the LSE problem has a unique solution, */ /* which is obtained using a generalized RQ factorization of the */ /* matrices (B, A) given by */ /* B = (0 R)*Q, A = Z*T*Q. */ /* Arguments */ /* ========= */ /* M (input) INTEGER */ /* The number of rows of the matrix A. M >= 0. */ /* N (input) INTEGER */ /* The number of columns of the matrices A and B. N >= 0. */ /* P (input) INTEGER */ /* The number of rows of the matrix B. 0 <= P <= N <= M+P. */ /* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the M-by-N matrix A. */ /* On exit, the elements on and above the diagonal of the array */ /* contain the min(M,N)-by-N upper trapezoidal matrix T. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,M). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,N) */ /* On entry, the P-by-N matrix B. */ /* On exit, the upper triangle of the subarray B(1:P,N-P+1:N) */ /* contains the P-by-P upper triangular matrix R. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,P). */ /* C (input/output) DOUBLE PRECISION array, dimension (M) */ /* On entry, C contains the right hand side vector for the */ /* least squares part of the LSE problem. */ /* On exit, the residual sum of squares for the solution */ /* is given by the sum of squares of elements N-P+1 to M of */ /* vector C. */ /* D (input/output) DOUBLE PRECISION array, dimension (P) */ /* On entry, D contains the right hand side vector for the */ /* constrained equation. */ /* On exit, D is destroyed. */ /* X (output) DOUBLE PRECISION array, dimension (N) */ /* On exit, X is the solution of the LSE problem. */ /* WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) */ /* On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */ /* LWORK (input) INTEGER */ /* The dimension of the array WORK. LWORK >= max(1,M+N+P). */ /* For optimum performance LWORK >= P+min(M,N)+max(M,N)*NB, */ /* where NB is an upper bound for the optimal blocksizes for */ /* DGEQRF, SGERQF, DORMQR and SORMRQ. */ /* If LWORK = -1, then a workspace query is assumed; the routine */ /* only calculates the optimal size of the WORK array, returns */ /* this value as the first entry of the WORK array, and no error */ /* message related to LWORK is issued by XERBLA. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* = 1: the upper triangular factor R associated with B in the */ /* generalized RQ factorization of the pair (B, A) is */ /* singular, so that rank(B) < P; the least squares */ /* solution could not be computed. */ /* = 2: the (N-P) by (N-P) part of the upper trapezoidal factor */ /* T associated with A in the generalized RQ factorization */ /* of the pair (B, A) is singular, so that */ /* rank( (A) ) < N; the least squares solution could not */ /* ( (B) ) */ /* be computed. */ /* ===================================================================== */ /* Test the input parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --c__; --d__; --x; --work; /* Function Body */ *info = 0; mn = min(*m,*n); lquery = *lwork == -1; if (*m < 0) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*p < 0 || *p > *n || *p < *n - *m) { *info = -3; } else if (*lda < max(1,*m)) { *info = -5; } else if (*ldb < max(1,*p)) { *info = -7; } /* Calculate workspace */ if (*info == 0) { if (*n == 0) { lwkmin = 1; lwkopt = 1; } else { nb1 = ilaenv_(&c__1, "DGEQRF", " ", m, n, &c_n1, &c_n1); nb2 = ilaenv_(&c__1, "DGERQF", " ", m, n, &c_n1, &c_n1); nb3 = ilaenv_(&c__1, "DORMQR", " ", m, n, p, &c_n1); nb4 = ilaenv_(&c__1, "DORMRQ", " ", m, n, p, &c_n1); /* Computing MAX */ i__1 = max(nb1,nb2), i__1 = max(i__1,nb3); nb = max(i__1,nb4); lwkmin = *m + *n + *p; lwkopt = *p + mn + max(*m,*n) * nb; } work[1] = (doublereal) lwkopt; if (*lwork < lwkmin && ! lquery) { *info = -12; } } if (*info != 0) { i__1 = -(*info); xerbla_("DGGLSE", &i__1); return 0; } else if (lquery) { return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Compute the GRQ factorization of matrices B and A: */ /* B*Q' = ( 0 T12 ) P Z'*A*Q' = ( R11 R12 ) N-P */ /* N-P P ( 0 R22 ) M+P-N */ /* N-P P */ /* where T12 and R11 are upper triangular, and Q and Z are */ /* orthogonal. */ i__1 = *lwork - *p - mn; dggrqf_(p, m, n, &b[b_offset], ldb, &work[1], &a[a_offset], lda, &work[*p + 1], &work[*p + mn + 1], &i__1, info); lopt = (integer) work[*p + mn + 1]; /* Update c = Z'*c = ( c1 ) N-P */ /* ( c2 ) M+P-N */ i__1 = max(1,*m); i__2 = *lwork - *p - mn; dormqr_("Left", "Transpose", m, &c__1, &mn, &a[a_offset], lda, &work[*p + 1], &c__[1], &i__1, &work[*p + mn + 1], &i__2, info); /* Computing MAX */ i__1 = lopt, i__2 = (integer) work[*p + mn + 1]; lopt = max(i__1,i__2); /* Solve T12*x2 = d for x2 */ if (*p > 0) { dtrtrs_("Upper", "No transpose", "Non-unit", p, &c__1, &b[(*n - *p + 1) * b_dim1 + 1], ldb, &d__[1], p, info); if (*info > 0) { *info = 1; return 0; } /* Put the solution in X */ dcopy_(p, &d__[1], &c__1, &x[*n - *p + 1], &c__1); /* Update c1 */ i__1 = *n - *p; dgemv_("No transpose", &i__1, p, &c_b31, &a[(*n - *p + 1) * a_dim1 + 1], lda, &d__[1], &c__1, &c_b33, &c__[1], &c__1); } /* Solve R11*x1 = c1 for x1 */ if (*n > *p) { i__1 = *n - *p; i__2 = *n - *p; dtrtrs_("Upper", "No transpose", "Non-unit", &i__1, &c__1, &a[ a_offset], lda, &c__[1], &i__2, info); if (*info > 0) { *info = 2; return 0; } /* Put the solutions in X */ i__1 = *n - *p; dcopy_(&i__1, &c__[1], &c__1, &x[1], &c__1); } /* Compute the residual vector: */ if (*m < *n) { nr = *m + *p - *n; if (nr > 0) { i__1 = *n - *m; dgemv_("No transpose", &nr, &i__1, &c_b31, &a[*n - *p + 1 + (*m + 1) * a_dim1], lda, &d__[nr + 1], &c__1, &c_b33, &c__[*n - *p + 1], &c__1); } } else { nr = *p; } if (nr > 0) { dtrmv_("Upper", "No transpose", "Non unit", &nr, &a[*n - *p + 1 + (*n - *p + 1) * a_dim1], lda, &d__[1], &c__1); daxpy_(&nr, &c_b31, &d__[1], &c__1, &c__[*n - *p + 1], &c__1); } /* Backward transformation x = Q'*x */ i__1 = *lwork - *p - mn; dormrq_("Left", "Transpose", n, &c__1, p, &b[b_offset], ldb, &work[1], &x[ 1], n, &work[*p + mn + 1], &i__1, info); /* Computing MAX */ i__1 = lopt, i__2 = (integer) work[*p + mn + 1]; work[1] = (doublereal) (*p + mn + max(i__1,i__2)); return 0; /* End of DGGLSE */ } /* dgglse_ */