/*! _dgbmatrix*_dcovector operator */ inline _dcovector operator*(const _dgbmatrix& mat, const _dcovector& vec) { #ifdef CPPL_VERBOSE std::cerr << "# [MARK] operator*(const _dgbmatrix&, const _dcovector&)" << std::endl; #endif//CPPL_VERBOSE #ifdef CPPL_DEBUG if(mat.N!=vec.L){ std::cerr << "[ERROR] operator*(const _dgbmatrix&, const _dcovector&)" << std::endl << "These matrix and vector can not make a product." << std::endl << "Your input was (" << mat.M << "x" << mat.N << ") * (" << vec.L << ")." << std::endl; exit(1); } #endif//CPPL_DEBUG dcovector newvec(mat.M); dgbmv_( 'N', mat.M, mat.N, mat.KL, mat.KU, 1.0, mat.Array, mat.KL+mat.KU+1, vec.Array, 1, 0.0, newvec.array, 1 ); mat.destroy(); vec.destroy(); return _(newvec); }
int f2c_dgbmv(char *trans, integer *M, integer *N, integer *KL, integer *KU, doublereal *alpha, doublereal *A, integer *lda, doublereal *X, integer *incX, doublereal *beta, doublereal *Y, integer *incY) { dgbmv_(trans, M, N, KL, KU, alpha, A, lda, X, incX, beta, Y, incY); return 0; }
/*! drovector*dgbmatrix operator */ inline _drovector operator*(const drovector& vec, const dgbmatrix& mat) {VERBOSE_REPORT; #ifdef CPPL_DEBUG if(vec.l!=mat.m){ ERROR_REPORT; std::cerr << "These vector and matrix can not make a product." << std::endl << "Your input was (" << vec.l << ") * (" << mat.m << "x" << mat.n << ")." << std::endl; exit(1); } #endif//CPPL_DEBUG drovector newvec(mat.n); dgbmv_( 'T', mat.m, mat.n, mat.kl, mat.ku, 1.0, mat.array, mat.kl+mat.ku+1, vec.array, 1, 0.0, newvec.array, 1 ); return _(newvec); }
/* Subroutine */ int dla_gbrfsx_extended__(integer *prec_type__, integer * trans_type__, integer *n, integer *kl, integer *ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, integer *ipiv, logical *colequ, doublereal *c__, doublereal *b, integer *ldb, doublereal *y, integer *ldy, doublereal *berr_out__, integer *n_norms__, doublereal *err_bnds_norm__, doublereal * err_bnds_comp__, doublereal *res, doublereal *ayb, doublereal *dy, doublereal *y_tail__, doublereal *rcond, integer *ithresh, doublereal *rthresh, doublereal *dz_ub__, logical *ignore_cwise__, integer *info) { /* System generated locals */ integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, y_dim1, y_offset, err_bnds_norm_dim1, err_bnds_norm_offset, err_bnds_comp_dim1, err_bnds_comp_offset, i__1, i__2, i__3; doublereal d__1, d__2; char ch__1[1]; /* Local variables */ doublereal dxratmax, dzratmax; integer i__, j, m; extern /* Subroutine */ int dla_gbamv__(integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); logical incr_prec__; doublereal prev_dz_z__, yk, final_dx_x__; extern /* Subroutine */ int dla_wwaddw__(integer *, doublereal *, doublereal *, doublereal *); doublereal final_dz_z__, prevnormdx; integer cnt; doublereal dyk, eps, incr_thresh__, dx_x__, dz_z__; extern /* Subroutine */ int dla_lin_berr__(integer *, integer *, integer * , doublereal *, doublereal *, doublereal *); doublereal ymin; extern /* Subroutine */ int blas_dgbmv_x__(integer *, integer *, integer * , integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *); integer y_prec_state__; extern /* Subroutine */ int blas_dgbmv2_x__(integer *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *, integer *), dgbmv_(char *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal dxrat, dzrat; extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); char trans[1]; doublereal normx, normy; extern doublereal dlamch_(char *); extern /* Subroutine */ int dgbtrs_(char *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); doublereal normdx; extern /* Character */ VOID chla_transtype__(char *, ftnlen, integer *); doublereal hugeval; integer x_state__, z_state__; /* -- LAPACK routine (version 3.2.1) -- */ /* -- Contributed by James Demmel, Deaglan Halligan, Yozo Hida and -- */ /* -- Jason Riedy of Univ. of California Berkeley. -- */ /* -- April 2009 -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley and NAG Ltd. -- */ /* .. */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLA_GBRFSX_EXTENDED improves the computed solution to a system of */ /* linear equations by performing extra-precise iterative refinement */ /* and provides error bounds and backward error estimates for the solution. */ /* This subroutine is called by DGBRFSX to perform iterative refinement. */ /* In addition to normwise error bound, the code provides maximum */ /* componentwise error bound if possible. See comments for ERR_BNDS_NORM */ /* and ERR_BNDS_COMP for details of the error bounds. Note that this */ /* subroutine is only resonsible for setting the second fields of */ /* ERR_BNDS_NORM and ERR_BNDS_COMP. */ /* Arguments */ /* ========= */ /* PREC_TYPE (input) INTEGER */ /* Specifies the intermediate precision to be used in refinement. */ /* The value is defined by ILAPREC(P) where P is a CHARACTER and */ /* P = 'S': Single */ /* = 'D': Double */ /* = 'I': Indigenous */ /* = 'X', 'E': Extra */ /* TRANS_TYPE (input) INTEGER */ /* Specifies the transposition operation on A. */ /* The value is defined by ILATRANS(T) where T is a CHARACTER and */ /* T = 'N': No transpose */ /* = 'T': Transpose */ /* = 'C': Conjugate transpose */ /* N (input) INTEGER */ /* The number of linear equations, i.e., the order of the */ /* matrix A. N >= 0. */ /* KL (input) INTEGER */ /* The number of subdiagonals within the band of A. KL >= 0. */ /* KU (input) INTEGER */ /* The number of superdiagonals within the band of A. KU >= 0 */ /* NRHS (input) INTEGER */ /* The number of right-hand-sides, i.e., the number of columns of the */ /* matrix B. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* On entry, the N-by-N matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* AF (input) DOUBLE PRECISION array, dimension (LDAF,N) */ /* The factors L and U from the factorization */ /* A = P*L*U as computed by DGBTRF. */ /* LDAF (input) INTEGER */ /* The leading dimension of the array AF. LDAF >= max(1,N). */ /* IPIV (input) INTEGER array, dimension (N) */ /* The pivot indices from the factorization A = P*L*U */ /* as computed by DGBTRF; row i of the matrix was interchanged */ /* with row IPIV(i). */ /* COLEQU (input) LOGICAL */ /* If .TRUE. then column equilibration was done to A before calling */ /* this routine. This is needed to compute the solution and error */ /* bounds correctly. */ /* C (input) DOUBLE PRECISION array, dimension (N) */ /* The column scale factors for A. If COLEQU = .FALSE., C */ /* is not accessed. If C is input, each element of C should be a power */ /* of the radix to ensure a reliable solution and error estimates. */ /* Scaling by powers of the radix does not cause rounding errors unless */ /* the result underflows or overflows. Rounding errors during scaling */ /* lead to refining with a matrix that is not equivalent to the */ /* input matrix, producing error estimates that may not be */ /* reliable. */ /* B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* The right-hand-side matrix B. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. LDB >= max(1,N). */ /* Y (input/output) DOUBLE PRECISION array, dimension */ /* (LDY,NRHS) */ /* On entry, the solution matrix X, as computed by DGBTRS. */ /* On exit, the improved solution matrix Y. */ /* LDY (input) INTEGER */ /* The leading dimension of the array Y. LDY >= max(1,N). */ /* BERR_OUT (output) DOUBLE PRECISION array, dimension (NRHS) */ /* On exit, BERR_OUT(j) contains the componentwise relative backward */ /* error for right-hand-side j from the formula */ /* max(i) ( abs(RES(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ /* where abs(Z) is the componentwise absolute value of the matrix */ /* or vector Z. This is computed by DLA_LIN_BERR. */ /* N_NORMS (input) INTEGER */ /* Determines which error bounds to return (see ERR_BNDS_NORM */ /* and ERR_BNDS_COMP). */ /* If N_NORMS >= 1 return normwise error bounds. */ /* If N_NORMS >= 2 return componentwise error bounds. */ /* ERR_BNDS_NORM (input/output) DOUBLE PRECISION array, dimension */ /* (NRHS, N_ERR_BNDS) */ /* For each right-hand side, this array contains information about */ /* various error bounds and condition numbers corresponding to the */ /* normwise relative error, which is defined as follows: */ /* Normwise relative error in the ith solution vector: */ /* max_j (abs(XTRUE(j,i) - X(j,i))) */ /* ------------------------------ */ /* max_j abs(X(j,i)) */ /* The array is indexed by the type of error information as described */ /* below. There currently are up to three pieces of information */ /* returned. */ /* The first index in ERR_BNDS_NORM(i,:) corresponds to the ith */ /* right-hand side. */ /* The second index in ERR_BNDS_NORM(:,err) contains the following */ /* three fields: */ /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ /* reciprocal condition number is less than the threshold */ /* sqrt(n) * slamch('Epsilon'). */ /* err = 2 "Guaranteed" error bound: The estimated forward error, */ /* almost certainly within a factor of 10 of the true error */ /* so long as the next entry is greater than the threshold */ /* sqrt(n) * slamch('Epsilon'). This error bound should only */ /* be trusted if the previous boolean is true. */ /* err = 3 Reciprocal condition number: Estimated normwise */ /* reciprocal condition number. Compared with the threshold */ /* sqrt(n) * slamch('Epsilon') to determine if the error */ /* estimate is "guaranteed". These reciprocal condition */ /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ /* appropriately scaled matrix Z. */ /* Let Z = S*A, where S scales each row by a power of the */ /* radix so all absolute row sums of Z are approximately 1. */ /* This subroutine is only responsible for setting the second field */ /* above. */ /* See Lapack Working Note 165 for further details and extra */ /* cautions. */ /* ERR_BNDS_COMP (input/output) DOUBLE PRECISION array, dimension */ /* (NRHS, N_ERR_BNDS) */ /* For each right-hand side, this array contains information about */ /* various error bounds and condition numbers corresponding to the */ /* componentwise relative error, which is defined as follows: */ /* Componentwise relative error in the ith solution vector: */ /* abs(XTRUE(j,i) - X(j,i)) */ /* max_j ---------------------- */ /* abs(X(j,i)) */ /* The array is indexed by the right-hand side i (on which the */ /* componentwise relative error depends), and the type of error */ /* information as described below. There currently are up to three */ /* pieces of information returned for each right-hand side. If */ /* componentwise accuracy is not requested (PARAMS(3) = 0.0), then */ /* ERR_BNDS_COMP is not accessed. If N_ERR_BNDS .LT. 3, then at most */ /* the first (:,N_ERR_BNDS) entries are returned. */ /* The first index in ERR_BNDS_COMP(i,:) corresponds to the ith */ /* right-hand side. */ /* The second index in ERR_BNDS_COMP(:,err) contains the following */ /* three fields: */ /* err = 1 "Trust/don't trust" boolean. Trust the answer if the */ /* reciprocal condition number is less than the threshold */ /* sqrt(n) * slamch('Epsilon'). */ /* err = 2 "Guaranteed" error bound: The estimated forward error, */ /* almost certainly within a factor of 10 of the true error */ /* so long as the next entry is greater than the threshold */ /* sqrt(n) * slamch('Epsilon'). This error bound should only */ /* be trusted if the previous boolean is true. */ /* err = 3 Reciprocal condition number: Estimated componentwise */ /* reciprocal condition number. Compared with the threshold */ /* sqrt(n) * slamch('Epsilon') to determine if the error */ /* estimate is "guaranteed". These reciprocal condition */ /* numbers are 1 / (norm(Z^{-1},inf) * norm(Z,inf)) for some */ /* appropriately scaled matrix Z. */ /* Let Z = S*(A*diag(x)), where x is the solution for the */ /* current right-hand side and S scales each row of */ /* A*diag(x) by a power of the radix so all absolute row */ /* sums of Z are approximately 1. */ /* This subroutine is only responsible for setting the second field */ /* above. */ /* See Lapack Working Note 165 for further details and extra */ /* cautions. */ /* RES (input) DOUBLE PRECISION array, dimension (N) */ /* Workspace to hold the intermediate residual. */ /* AYB (input) DOUBLE PRECISION array, dimension (N) */ /* Workspace. This can be the same workspace passed for Y_TAIL. */ /* DY (input) DOUBLE PRECISION array, dimension (N) */ /* Workspace to hold the intermediate solution. */ /* Y_TAIL (input) DOUBLE PRECISION array, dimension (N) */ /* Workspace to hold the trailing bits of the intermediate solution. */ /* RCOND (input) DOUBLE PRECISION */ /* Reciprocal scaled condition number. This is an estimate of the */ /* reciprocal Skeel condition number of the matrix A after */ /* equilibration (if done). If this is less than the machine */ /* precision (in particular, if it is zero), the matrix is singular */ /* to working precision. Note that the error may still be small even */ /* if this number is very small and the matrix appears ill- */ /* conditioned. */ /* ITHRESH (input) INTEGER */ /* The maximum number of residual computations allowed for */ /* refinement. The default is 10. For 'aggressive' set to 100 to */ /* permit convergence using approximate factorizations or */ /* factorizations other than LU. If the factorization uses a */ /* technique other than Gaussian elimination, the guarantees in */ /* ERR_BNDS_NORM and ERR_BNDS_COMP may no longer be trustworthy. */ /* RTHRESH (input) DOUBLE PRECISION */ /* Determines when to stop refinement if the error estimate stops */ /* decreasing. Refinement will stop when the next solution no longer */ /* satisfies norm(dx_{i+1}) < RTHRESH * norm(dx_i) where norm(Z) is */ /* the infinity norm of Z. RTHRESH satisfies 0 < RTHRESH <= 1. The */ /* default value is 0.5. For 'aggressive' set to 0.9 to permit */ /* convergence on extremely ill-conditioned matrices. See LAWN 165 */ /* for more details. */ /* DZ_UB (input) DOUBLE PRECISION */ /* Determines when to start considering componentwise convergence. */ /* Componentwise convergence is only considered after each component */ /* of the solution Y is stable, which we definte as the relative */ /* change in each component being less than DZ_UB. The default value */ /* is 0.25, requiring the first bit to be stable. See LAWN 165 for */ /* more details. */ /* IGNORE_CWISE (input) LOGICAL */ /* If .TRUE. then ignore componentwise convergence. Default value */ /* is .FALSE.. */ /* INFO (output) INTEGER */ /* = 0: Successful exit. */ /* < 0: if INFO = -i, the ith argument to DGBTRS had an illegal */ /* value */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. Parameters .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ err_bnds_comp_dim1 = *nrhs; err_bnds_comp_offset = 1 + err_bnds_comp_dim1; err_bnds_comp__ -= err_bnds_comp_offset; err_bnds_norm_dim1 = *nrhs; err_bnds_norm_offset = 1 + err_bnds_norm_dim1; err_bnds_norm__ -= err_bnds_norm_offset; ab_dim1 = *ldab; ab_offset = 1 + ab_dim1; ab -= ab_offset; afb_dim1 = *ldafb; afb_offset = 1 + afb_dim1; afb -= afb_offset; --ipiv; --c__; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; y_dim1 = *ldy; y_offset = 1 + y_dim1; y -= y_offset; --berr_out__; --res; --ayb; --dy; --y_tail__; /* Function Body */ if (*info != 0) { return 0; } chla_transtype__(ch__1, (ftnlen)1, trans_type__); *(unsigned char *)trans = *(unsigned char *)&ch__1[0]; eps = dlamch_("Epsilon"); hugeval = dlamch_("Overflow"); /* Force HUGEVAL to Inf */ hugeval *= hugeval; /* Using HUGEVAL may lead to spurious underflows. */ incr_thresh__ = (doublereal) (*n) * eps; m = *kl + *ku + 1; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { y_prec_state__ = 1; if (y_prec_state__ == 2) { i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { y_tail__[i__] = 0.; } } dxrat = 0.; dxratmax = 0.; dzrat = 0.; dzratmax = 0.; final_dx_x__ = hugeval; final_dz_z__ = hugeval; prevnormdx = hugeval; prev_dz_z__ = hugeval; dz_z__ = hugeval; dx_x__ = hugeval; x_state__ = 1; z_state__ = 0; incr_prec__ = FALSE_; i__2 = *ithresh; for (cnt = 1; cnt <= i__2; ++cnt) { /* Compute residual RES = B_s - op(A_s) * Y, */ /* op(A) = A, A**T, or A**H depending on TRANS (and type). */ dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1); if (y_prec_state__ == 0) { dgbmv_(trans, &m, n, kl, ku, &c_b6, &ab[ab_offset], ldab, &y[ j * y_dim1 + 1], &c__1, &c_b8, &res[1], &c__1); } else if (y_prec_state__ == 1) { blas_dgbmv_x__(trans_type__, n, n, kl, ku, &c_b6, &ab[ ab_offset], ldab, &y[j * y_dim1 + 1], &c__1, &c_b8, & res[1], &c__1, prec_type__); } else { blas_dgbmv2_x__(trans_type__, n, n, kl, ku, &c_b6, &ab[ ab_offset], ldab, &y[j * y_dim1 + 1], &y_tail__[1], & c__1, &c_b8, &res[1], &c__1, prec_type__); } /* XXX: RES is no longer needed. */ dcopy_(n, &res[1], &c__1, &dy[1], &c__1); dgbtrs_(trans, n, kl, ku, &c__1, &afb[afb_offset], ldafb, &ipiv[1] , &dy[1], n, info); /* Calculate relative changes DX_X, DZ_Z and ratios DXRAT, DZRAT. */ normx = 0.; normy = 0.; normdx = 0.; dz_z__ = 0.; ymin = hugeval; i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { yk = (d__1 = y[i__ + j * y_dim1], abs(d__1)); dyk = (d__1 = dy[i__], abs(d__1)); if (yk != 0.) { /* Computing MAX */ d__1 = dz_z__, d__2 = dyk / yk; dz_z__ = max(d__1,d__2); } else if (dyk != 0.) { dz_z__ = hugeval; } ymin = min(ymin,yk); normy = max(normy,yk); if (*colequ) { /* Computing MAX */ d__1 = normx, d__2 = yk * c__[i__]; normx = max(d__1,d__2); /* Computing MAX */ d__1 = normdx, d__2 = dyk * c__[i__]; normdx = max(d__1,d__2); } else { normx = normy; normdx = max(normdx,dyk); } } if (normx != 0.) { dx_x__ = normdx / normx; } else if (normdx == 0.) { dx_x__ = 0.; } else { dx_x__ = hugeval; } dxrat = normdx / prevnormdx; dzrat = dz_z__ / prev_dz_z__; /* Check termination criteria. */ if (! (*ignore_cwise__) && ymin * *rcond < incr_thresh__ * normy && y_prec_state__ < 2) { incr_prec__ = TRUE_; } if (x_state__ == 3 && dxrat <= *rthresh) { x_state__ = 1; } if (x_state__ == 1) { if (dx_x__ <= eps) { x_state__ = 2; } else if (dxrat > *rthresh) { if (y_prec_state__ != 2) { incr_prec__ = TRUE_; } else { x_state__ = 3; } } else { if (dxrat > dxratmax) { dxratmax = dxrat; } } if (x_state__ > 1) { final_dx_x__ = dx_x__; } } if (z_state__ == 0 && dz_z__ <= *dz_ub__) { z_state__ = 1; } if (z_state__ == 3 && dzrat <= *rthresh) { z_state__ = 1; } if (z_state__ == 1) { if (dz_z__ <= eps) { z_state__ = 2; } else if (dz_z__ > *dz_ub__) { z_state__ = 0; dzratmax = 0.; final_dz_z__ = hugeval; } else if (dzrat > *rthresh) { if (y_prec_state__ != 2) { incr_prec__ = TRUE_; } else { z_state__ = 3; } } else { if (dzrat > dzratmax) { dzratmax = dzrat; } } if (z_state__ > 1) { final_dz_z__ = dz_z__; } } /* Exit if both normwise and componentwise stopped working, */ /* but if componentwise is unstable, let it go at least two */ /* iterations. */ if (x_state__ != 1) { if (*ignore_cwise__) { goto L666; } if (z_state__ == 3 || z_state__ == 2) { goto L666; } if (z_state__ == 0 && cnt > 1) { goto L666; } } if (incr_prec__) { incr_prec__ = FALSE_; ++y_prec_state__; i__3 = *n; for (i__ = 1; i__ <= i__3; ++i__) { y_tail__[i__] = 0.; } } prevnormdx = normdx; prev_dz_z__ = dz_z__; /* Update soluton. */ if (y_prec_state__ < 2) { daxpy_(n, &c_b8, &dy[1], &c__1, &y[j * y_dim1 + 1], &c__1); } else { dla_wwaddw__(n, &y[j * y_dim1 + 1], &y_tail__[1], &dy[1]); } } /* Target of "IF (Z_STOP .AND. X_STOP)". Sun's f77 won't EXIT. */ L666: /* Set final_* when cnt hits ithresh. */ if (x_state__ == 1) { final_dx_x__ = dx_x__; } if (z_state__ == 1) { final_dz_z__ = dz_z__; } /* Compute error bounds. */ if (*n_norms__ >= 1) { err_bnds_norm__[j + (err_bnds_norm_dim1 << 1)] = final_dx_x__ / ( 1 - dxratmax); } if (*n_norms__ >= 2) { err_bnds_comp__[j + (err_bnds_comp_dim1 << 1)] = final_dz_z__ / ( 1 - dzratmax); } /* Compute componentwise relative backward error from formula */ /* max(i) ( abs(R(i)) / ( abs(op(A_s))*abs(Y) + abs(B_s) )(i) ) */ /* where abs(Z) is the componentwise absolute value of the matrix */ /* or vector Z. */ /* Compute residual RES = B_s - op(A_s) * Y, */ /* op(A) = A, A**T, or A**H depending on TRANS (and type). */ dcopy_(n, &b[j * b_dim1 + 1], &c__1, &res[1], &c__1); dgbmv_(trans, n, n, kl, ku, &c_b6, &ab[ab_offset], ldab, &y[j * y_dim1 + 1], &c__1, &c_b8, &res[1], &c__1); i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { ayb[i__] = (d__1 = b[i__ + j * b_dim1], abs(d__1)); } /* Compute abs(op(A_s))*abs(Y) + abs(B_s). */ dla_gbamv__(trans_type__, n, n, kl, ku, &c_b8, &ab[ab_offset], ldab, & y[j * y_dim1 + 1], &c__1, &c_b8, &ayb[1], &c__1); dla_lin_berr__(n, n, &c__1, &res[1], &ayb[1], &berr_out__[j]); /* End of loop for each RHS */ } return 0; } /* dla_gbrfsx_extended__ */
void dgbmv(char trans, int m, int n, int kl, int ku, double alpha, double *a, int inca, double *x, int incx, double beta, double *y, int incy ) { dgbmv_( &trans, &m, &n, &kl, &ku, &alpha, a, &inca, x, &incx, &beta, y, &incy ); }
/* Subroutine */ int dlarhs_(char *path, char *xtype, char *uplo, char *trans, integer *m, integer *n, integer *kl, integer *ku, integer *nrhs, doublereal *a, integer *lda, doublereal *x, integer *ldx, doublereal * b, integer *ldb, integer *iseed, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset, i__1; /* Builtin functions Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); /* Local variables */ static logical band; static char diag[1]; static logical tran; static integer j; extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dgbmv_(char *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern logical lsame_(char *, char *); extern /* Subroutine */ int dsbmv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dtbmv_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static char c1[1], c2[2]; extern /* Subroutine */ int dspmv_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *), dsymm_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dtpmv_( char *, char *, char *, integer *, doublereal *, doublereal *, integer *); static integer mb, nx; extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *); extern logical lsamen_(integer *, char *, char *); extern /* Subroutine */ int dlarnv_(integer *, integer *, integer *, doublereal *); static logical notran, gen, tri, qrs, sym; #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= DLARHS chooses a set of NRHS random solution vectors and sets up the right hand sides for the linear system op( A ) * X = B, where op( A ) may be A or A' (transpose of A). Arguments ========= PATH (input) CHARACTER*3 The type of the real matrix A. PATH may be given in any combination of upper and lower case. Valid types include xGE: General m x n matrix xGB: General banded matrix xPO: Symmetric positive definite, 2-D storage xPP: Symmetric positive definite packed xPB: Symmetric positive definite banded xSY: Symmetric indefinite, 2-D storage xSP: Symmetric indefinite packed xSB: Symmetric indefinite banded xTR: Triangular xTP: Triangular packed xTB: Triangular banded xQR: General m x n matrix xLQ: General m x n matrix xQL: General m x n matrix xRQ: General m x n matrix where the leading character indicates the precision. XTYPE (input) CHARACTER*1 Specifies how the exact solution X will be determined: = 'N': New solution; generate a random X. = 'C': Computed; use value of X on entry. UPLO (input) CHARACTER*1 Specifies whether the upper or lower triangular part of the matrix A is stored, if A is symmetric. = 'U': Upper triangular = 'L': Lower triangular TRANS (input) CHARACTER*1 Specifies the operation applied to the matrix A. = 'N': System is A * x = b = 'T': System is A'* x = b = 'C': System is A'* x = b M (input) INTEGER The number or rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. KL (input) INTEGER Used only if A is a band matrix; specifies the number of subdiagonals of A if A is a general band matrix or if A is symmetric or triangular and UPLO = 'L'; specifies the number of superdiagonals of A if A is symmetric or triangular and UPLO = 'U'. 0 <= KL <= M-1. KU (input) INTEGER Used only if A is a general band matrix or if A is triangular. If PATH = xGB, specifies the number of superdiagonals of A, and 0 <= KU <= N-1. If PATH = xTR, xTP, or xTB, specifies whether or not the matrix has unit diagonal: = 1: matrix has non-unit diagonal (default) = 2: matrix has unit diagonal NRHS (input) INTEGER The number of right hand side vectors in the system A*X = B. A (input) DOUBLE PRECISION array, dimension (LDA,N) The test matrix whose type is given by PATH. LDA (input) INTEGER The leading dimension of the array A. If PATH = xGB, LDA >= KL+KU+1. If PATH = xPB, xSB, xHB, or xTB, LDA >= KL+1. Otherwise, LDA >= max(1,M). X (input or output) DOUBLE PRECISION array, dimension(LDX,NRHS) On entry, if XTYPE = 'C' (for 'Computed'), then X contains the exact solution to the system of linear equations. On exit, if XTYPE = 'N' (for 'New'), then X is initialized with random values. LDX (input) INTEGER The leading dimension of the array X. If TRANS = 'N', LDX >= max(1,N); if TRANS = 'T', LDX >= max(1,M). B (output) DOUBLE PRECISION array, dimension (LDB,NRHS) The right hand side vector(s) for the system of equations, computed from B = op(A) * X, where op(A) is determined by TRANS. LDB (input) INTEGER The leading dimension of the array B. If TRANS = 'N', LDB >= max(1,M); if TRANS = 'T', LDB >= max(1,N). ISEED (input/output) INTEGER array, dimension (4) The seed vector for the random number generator (used in DLATMS). Modified on exit. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input parameters. Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1 * 1; x -= x_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --iseed; /* Function Body */ *info = 0; *(unsigned char *)c1 = *(unsigned char *)path; s_copy(c2, path + 1, (ftnlen)2, (ftnlen)2); tran = lsame_(trans, "T") || lsame_(trans, "C"); notran = ! tran; gen = lsame_(path + 1, "G"); qrs = lsame_(path + 1, "Q") || lsame_(path + 2, "Q"); sym = lsame_(path + 1, "P") || lsame_(path + 1, "S"); tri = lsame_(path + 1, "T"); band = lsame_(path + 2, "B"); if (! lsame_(c1, "Double precision")) { *info = -1; } else if (! (lsame_(xtype, "N") || lsame_(xtype, "C"))) { *info = -2; } else if ((sym || tri) && ! (lsame_(uplo, "U") || lsame_(uplo, "L"))) { *info = -3; } else if ((gen || qrs) && ! (tran || lsame_(trans, "N"))) { *info = -4; } else if (*m < 0) { *info = -5; } else if (*n < 0) { *info = -6; } else if (band && *kl < 0) { *info = -7; } else if (band && *ku < 0) { *info = -8; } else if (*nrhs < 0) { *info = -9; } else if (! band && *lda < max(1,*m) || band && (sym || tri) && *lda < * kl + 1 || band && gen && *lda < *kl + *ku + 1) { *info = -11; } else if (notran && *ldx < max(1,*n) || tran && *ldx < max(1,*m)) { *info = -13; } else if (notran && *ldb < max(1,*m) || tran && *ldb < max(1,*n)) { *info = -15; } if (*info != 0) { i__1 = -(*info); xerbla_("DLARHS", &i__1); return 0; } /* Initialize X to NRHS random vectors unless XTYPE = 'C'. */ if (tran) { nx = *m; mb = *n; } else { nx = *n; mb = *m; } if (! lsame_(xtype, "C")) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { dlarnv_(&c__2, &iseed[1], n, &x_ref(1, j)); /* L10: */ } } /* Multiply X by op( A ) using an appropriate matrix multiply routine. */ if (lsamen_(&c__2, c2, "GE") || lsamen_(&c__2, c2, "QR") || lsamen_(&c__2, c2, "LQ") || lsamen_(&c__2, c2, "QL") || lsamen_(&c__2, c2, "RQ")) { /* General matrix */ dgemm_(trans, "N", &mb, nrhs, &nx, &c_b32, &a[a_offset], lda, &x[ x_offset], ldx, &c_b33, &b[b_offset], ldb); } else if (lsamen_(&c__2, c2, "PO") || lsamen_(& c__2, c2, "SY")) { /* Symmetric matrix, 2-D storage */ dsymm_("Left", uplo, n, nrhs, &c_b32, &a[a_offset], lda, &x[x_offset], ldx, &c_b33, &b[b_offset], ldb); } else if (lsamen_(&c__2, c2, "GB")) { /* General matrix, band storage */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { dgbmv_(trans, &mb, &nx, kl, ku, &c_b32, &a[a_offset], lda, &x_ref( 1, j), &c__1, &c_b33, &b_ref(1, j), &c__1); /* L20: */ } } else if (lsamen_(&c__2, c2, "PB")) { /* Symmetric matrix, band storage */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { dsbmv_(uplo, n, kl, &c_b32, &a[a_offset], lda, &x_ref(1, j), & c__1, &c_b33, &b_ref(1, j), &c__1); /* L30: */ } } else if (lsamen_(&c__2, c2, "PP") || lsamen_(& c__2, c2, "SP")) { /* Symmetric matrix, packed storage */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { dspmv_(uplo, n, &c_b32, &a[a_offset], &x_ref(1, j), &c__1, &c_b33, &b_ref(1, j), &c__1); /* L40: */ } } else if (lsamen_(&c__2, c2, "TR")) { /* Triangular matrix. Note that for triangular matrices, KU = 1 => non-unit triangular KU = 2 => unit triangular */ dlacpy_("Full", n, nrhs, &x[x_offset], ldx, &b[b_offset], ldb); if (*ku == 2) { *(unsigned char *)diag = 'U'; } else { *(unsigned char *)diag = 'N'; } dtrmm_("Left", uplo, trans, diag, n, nrhs, &c_b32, &a[a_offset], lda, &b[b_offset], ldb) ; } else if (lsamen_(&c__2, c2, "TP")) { /* Triangular matrix, packed storage */ dlacpy_("Full", n, nrhs, &x[x_offset], ldx, &b[b_offset], ldb); if (*ku == 2) { *(unsigned char *)diag = 'U'; } else { *(unsigned char *)diag = 'N'; } i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { dtpmv_(uplo, trans, diag, n, &a[a_offset], &b_ref(1, j), &c__1); /* L50: */ } } else if (lsamen_(&c__2, c2, "TB")) { /* Triangular matrix, banded storage */ dlacpy_("Full", n, nrhs, &x[x_offset], ldx, &b[b_offset], ldb); if (*ku == 2) { *(unsigned char *)diag = 'U'; } else { *(unsigned char *)diag = 'N'; } i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { dtbmv_(uplo, trans, diag, n, kl, &a[a_offset], lda, &b_ref(1, j), &c__1); /* L60: */ } } else { /* If PATH is none of the above, return with an error code. */ *info = -1; i__1 = -(*info); xerbla_("DLARHS", &i__1); } return 0; /* End of DLARHS */ } /* dlarhs_ */
/* Subroutine */ int dgbt02_(char *trans, integer *m, integer *n, integer *kl, integer *ku, integer *nrhs, doublereal *a, integer *lda, doublereal * x, integer *ldx, doublereal *b, integer *ldb, doublereal *resid) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3; doublereal d__1, d__2; /* Local variables */ static integer j; extern /* Subroutine */ int dgbmv_(char *, integer *, integer *, integer * , integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern logical lsame_(char *, char *); extern doublereal dasum_(integer *, doublereal *, integer *); static doublereal anorm, bnorm; static integer i1, i2, n1; static doublereal xnorm; static integer kd; extern doublereal dlamch_(char *); static doublereal eps; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= DGBT02 computes the residual for a solution of a banded system of equations A*x = b or A'*x = b: RESID = norm( B - A*X ) / ( norm(A) * norm(X) * EPS). where EPS is the machine precision. Arguments ========= TRANS (input) CHARACTER*1 Specifies the form of the system of equations: = 'N': A *x = b = 'T': A'*x = b, where A' is the transpose of A = 'C': A'*x = b, where A' is the transpose of A M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. KL (input) INTEGER The number of subdiagonals within the band of A. KL >= 0. KU (input) INTEGER The number of superdiagonals within the band of A. KU >= 0. NRHS (input) INTEGER The number of columns of B. NRHS >= 0. A (input) DOUBLE PRECISION array, dimension (LDA,N) The original matrix A in band storage, stored in rows 1 to KL+KU+1. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,KL+KU+1). X (input) DOUBLE PRECISION array, dimension (LDX,NRHS) The computed solution vectors for the system of linear equations. LDX (input) INTEGER The leading dimension of the array X. If TRANS = 'N', LDX >= max(1,N); if TRANS = 'T' or 'C', LDX >= max(1,M). B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) On entry, the right hand side vectors for the system of linear equations. On exit, B is overwritten with the difference B - A*X. LDB (input) INTEGER The leading dimension of the array B. IF TRANS = 'N', LDB >= max(1,M); if TRANS = 'T' or 'C', LDB >= max(1,N). RESID (output) DOUBLE PRECISION The maximum over the number of right hand sides of norm(B - A*X) / ( norm(A) * norm(X) * EPS ). ===================================================================== Quick return if N = 0 pr NRHS = 0 Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1 * 1; x -= x_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; /* Function Body */ if (*m <= 0 || *n <= 0 || *nrhs <= 0) { *resid = 0.; return 0; } /* Exit with RESID = 1/EPS if ANORM = 0. */ eps = dlamch_("Epsilon"); kd = *ku + 1; anorm = 0.; i__1 = *n; for (j = 1; j <= i__1; ++j) { /* Computing MAX */ i__2 = kd + 1 - j; i1 = max(i__2,1); /* Computing MIN */ i__2 = kd + *m - j, i__3 = *kl + kd; i2 = min(i__2,i__3); /* Computing MAX */ i__2 = i2 - i1 + 1; d__1 = anorm, d__2 = dasum_(&i__2, &a_ref(i1, j), &c__1); anorm = max(d__1,d__2); /* L10: */ } if (anorm <= 0.) { *resid = 1. / eps; return 0; } if (lsame_(trans, "T") || lsame_(trans, "C")) { n1 = *n; } else { n1 = *m; } /* Compute B - A*X (or B - A'*X ) */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { dgbmv_(trans, m, n, kl, ku, &c_b8, &a[a_offset], lda, &x_ref(1, j), & c__1, &c_b10, &b_ref(1, j), &c__1); /* L20: */ } /* Compute the maximum over the number of right hand sides of norm(B - A*X) / ( norm(A) * norm(X) * EPS ). */ *resid = 0.; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { bnorm = dasum_(&n1, &b_ref(1, j), &c__1); xnorm = dasum_(&n1, &x_ref(1, j), &c__1); if (xnorm <= 0.) { *resid = 1. / eps; } else { /* Computing MAX */ d__1 = *resid, d__2 = bnorm / anorm / xnorm / eps; *resid = max(d__1,d__2); } /* L30: */ } return 0; /* End of DGBT02 */ } /* dgbt02_ */
/* Subroutine */ int dtimmv_(char *vname, integer *nn, integer *nval, integer *nk, integer *kval, integer *nlda, integer *ldaval, doublereal * timmin, doublereal *a, integer *lb, doublereal *b, doublereal *c__, doublereal *reslts, integer *ldr1, integer *ldr2, integer *nout, ftnlen vname_len) { /* Initialized data */ static char subnam[6*2] = "DGEMV " "DGBMV "; /* Format strings */ static char fmt_9999[] = "(1x,a6,\002: Unrecognized path or subroutine " "name\002,/)"; static char fmt_9998[] = "(1x,a6,\002 timing run not attempted\002,/)"; static char fmt_9997[] = "(/\002 *** Speed of \002,a6,\002 in megaflops " "***\002)"; static char fmt_9996[] = "(5x,\002with LDA = \002,i5)"; static char fmt_9995[] = "(5x,\002line \002,i2,\002 with LDA = \002,i5)"; /* System generated locals */ integer reslts_dim1, reslts_dim2, reslts_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; /* Builtin functions Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void); /* Local variables */ static integer ilda, info; static doublereal time; static integer isub, nrhs, i__, k, n; static char cname[6]; extern /* Subroutine */ int dgbmv_(char *, integer *, integer *, integer * , integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern logical lsame_(char *, char *); extern /* Subroutine */ int dgemv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); static doublereal s1, s2; extern doublereal dopbl2_(char *, integer *, integer *, integer *, integer *); static integer ib, ic, ik, in, kl, ku; extern doublereal dsecnd_(void); extern /* Subroutine */ int atimck_(integer *, char *, integer *, integer *, integer *, integer *, integer *, integer *, ftnlen); extern logical lsamen_(integer *, char *, char *); extern doublereal dmflop_(doublereal *, doublereal *, integer *); extern /* Subroutine */ int dtimmg_(integer *, integer *, integer *, doublereal *, integer *, integer *, integer *), dprtbl_(char *, char *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, ftnlen, ftnlen); static doublereal untime; static logical timsub[2]; static integer lda, ldb, icl; static doublereal ops; static char lab1[1], lab2[1]; /* Fortran I/O blocks */ static cilist io___5 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___9 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___10 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___11 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___13 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___14 = { 0, 0, 0, 0, 0 }; #define subnam_ref(a_0,a_1) &subnam[(a_1)*6 + a_0 - 6] #define reslts_ref(a_1,a_2,a_3) reslts[((a_3)*reslts_dim2 + (a_2))*\ reslts_dim1 + a_1] /* -- LAPACK timing routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University March 31, 1993 Purpose ======= DTIMMV times individual BLAS 2 routines. Arguments ========= VNAME (input) CHARACTER*(*) The name of the Level 2 BLAS routine to be timed. NN (input) INTEGER The number of values of N contained in the vector NVAL. NVAL (input) INTEGER array, dimension (NN) The values of the matrix dimension N. NK (input) INTEGER The number of values of K contained in the vector KVAL. KVAL (input) INTEGER array, dimension (NK) The values of the bandwidth K. NLDA (input) INTEGER The number of values of LDA contained in the vector LDAVAL. LDAVAL (input) INTEGER array, dimension (NLDA) The values of the leading dimension of the array A. TIMMIN (input) DOUBLE PRECISION The minimum time a subroutine will be timed. A (workspace) DOUBLE PRECISION array, dimension (LDAMAX*NMAX) where LDAMAX and NMAX are the maximum values permitted for LDA and N. LB (input) INTEGER The length of B and C, needed when timing DGBMV. If timing DGEMV, LB >= LDAMAX*NMAX. B (workspace) DOUBLE PRECISION array, dimension (LB) C (workspace) DOUBLE PRECISION array, dimension (LB) RESLTS (output) DOUBLE PRECISION array, dimension (LDR1,LDR2,NLDA) The timing results for each subroutine over the relevant values of N and LDA. LDR1 (input) INTEGER The first dimension of RESLTS. LDR1 >= max(1,NK). LDR2 (input) INTEGER The second dimension of RESLTS. LDR2 >= max(1,NN). NOUT (input) INTEGER The unit number for output. ===================================================================== Parameter adjustments */ --nval; --kval; --ldaval; --a; --b; --c__; reslts_dim1 = *ldr1; reslts_dim2 = *ldr2; reslts_offset = 1 + reslts_dim1 * (1 + reslts_dim2 * 1); reslts -= reslts_offset; /* Function Body */ s_copy(cname, vname, (ftnlen)6, vname_len); for (isub = 1; isub <= 2; ++isub) { timsub[isub - 1] = lsamen_(&c__6, cname, subnam_ref(0, isub)); if (timsub[isub - 1]) { goto L20; } /* L10: */ } io___5.ciunit = *nout; s_wsfe(&io___5); do_fio(&c__1, cname, (ftnlen)6); e_wsfe(); goto L150; L20: /* Check that N or K <= LDA for the input values. */ if (lsame_(cname + 2, "B")) { atimck_(&c__0, cname, nk, &kval[1], nlda, &ldaval[1], nout, &info, ( ftnlen)6); *(unsigned char *)lab1 = 'M'; *(unsigned char *)lab2 = 'K'; } else { atimck_(&c__2, cname, nn, &nval[1], nlda, &ldaval[1], nout, &info, ( ftnlen)6); *(unsigned char *)lab1 = ' '; *(unsigned char *)lab2 = 'N'; } if (info > 0) { io___9.ciunit = *nout; s_wsfe(&io___9); do_fio(&c__1, cname, (ftnlen)6); e_wsfe(); goto L150; } /* Print the table header on unit NOUT. */ io___10.ciunit = *nout; s_wsfe(&io___10); do_fio(&c__1, vname, vname_len); e_wsfe(); if (*nlda == 1) { io___11.ciunit = *nout; s_wsfe(&io___11); do_fio(&c__1, (char *)&ldaval[1], (ftnlen)sizeof(integer)); e_wsfe(); } else { i__1 = *nlda; for (i__ = 1; i__ <= i__1; ++i__) { io___13.ciunit = *nout; s_wsfe(&io___13); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldaval[i__], (ftnlen)sizeof(integer)); e_wsfe(); /* L30: */ } } io___14.ciunit = *nout; s_wsle(&io___14); e_wsle(); /* Time DGEMV */ if (timsub[0]) { i__1 = *nlda; for (ilda = 1; ilda <= i__1; ++ilda) { lda = ldaval[ilda]; i__2 = *nn; for (in = 1; in <= i__2; ++in) { n = nval[in]; nrhs = n; ldb = lda; dtimmg_(&c__1, &n, &n, &a[1], &lda, &c__0, &c__0); dtimmg_(&c__0, &n, &nrhs, &b[1], &ldb, &c__0, &c__0); dtimmg_(&c__1, &n, &nrhs, &c__[1], &ldb, &c__0, &c__0); ic = 0; s1 = dsecnd_(); L40: ib = 1; i__3 = nrhs; for (i__ = 1; i__ <= i__3; ++i__) { dgemv_("No transpose", &n, &n, &c_b44, &a[1], &lda, &b[ib] , &c__1, &c_b44, &c__[ib], &c__1); ib += ldb; /* L50: */ } s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { dtimmg_(&c__1, &n, &nrhs, &c__[1], &ldb, &c__0, &c__0); goto L40; } /* Subtract the time used in DTIMMG. */ icl = 1; s1 = dsecnd_(); L60: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { dtimmg_(&c__1, &n, &nrhs, &c__[1], &ldb, &c__0, &c__0); goto L60; } time = (time - untime) / (doublereal) ic; ops = nrhs * dopbl2_("DGEMV ", &n, &n, &c__0, &c__0); reslts_ref(1, in, ilda) = dmflop_(&ops, &time, &c__0); /* L70: */ } /* L80: */ } dprtbl_(lab1, lab2, &c__1, &nval[1], nn, &nval[1], nlda, &reslts[ reslts_offset], ldr1, ldr2, nout, (ftnlen)1, (ftnlen)1); } else if (timsub[1]) { /* Time DGBMV */ i__1 = *nlda; for (ilda = 1; ilda <= i__1; ++ilda) { lda = ldaval[ilda]; i__2 = *nn; for (in = 1; in <= i__2; ++in) { n = nval[in]; i__3 = *nk; for (ik = 1; ik <= i__3; ++ik) { /* Computing MIN Computing MAX */ i__6 = 0, i__7 = kval[ik]; i__4 = n - 1, i__5 = max(i__6,i__7); k = min(i__4,i__5); kl = k; ku = k; ldb = n; dtimmg_(&c__2, &n, &n, &a[1], &lda, &kl, &ku); /* Computing MIN */ i__4 = k, i__5 = *lb / ldb; nrhs = min(i__4,i__5); dtimmg_(&c__0, &n, &nrhs, &b[1], &ldb, &c__0, &c__0); dtimmg_(&c__1, &n, &nrhs, &c__[1], &ldb, &c__0, &c__0); ic = 0; s1 = dsecnd_(); L90: ib = 1; i__4 = nrhs; for (i__ = 1; i__ <= i__4; ++i__) { dgbmv_("No transpose", &n, &n, &kl, &ku, &c_b44, &a[ ku + 1], &lda, &b[ib], &c__1, &c_b44, &c__[ib] , &c__1); ib += ldb; /* L100: */ } s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { dtimmg_(&c__1, &n, &nrhs, &c__[1], &ldb, &c__0, &c__0) ; goto L90; } /* Subtract the time used in DTIMMG. */ icl = 1; s1 = dsecnd_(); L110: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { dtimmg_(&c__1, &n, &nrhs, &c__[1], &ldb, &c__0, &c__0) ; goto L110; } time = (time - untime) / (doublereal) ic; ops = nrhs * dopbl2_("DGBMV ", &n, &n, &kl, &ku); reslts_ref(in, ik, ilda) = dmflop_(&ops, &time, &c__0); /* L120: */ } /* L130: */ } /* L140: */ } dprtbl_(lab1, lab2, nn, &nval[1], nk, &kval[1], nlda, &reslts[ reslts_offset], ldr1, ldr2, nout, (ftnlen)1, (ftnlen)1); } L150: return 0; /* End of DTIMMV */ } /* dtimmv_ */
/* Subroutine */ int dgbrfs_(char *trans, integer *n, integer *kl, integer * ku, integer *nrhs, doublereal *ab, integer *ldab, doublereal *afb, integer *ldafb, integer *ipiv, doublereal *b, integer *ldb, doublereal *x, integer *ldx, doublereal *ferr, doublereal *berr, doublereal *work, integer *iwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DGBRFS improves the computed solution to a system of linear equations when the coefficient matrix is banded, and provides error bounds and backward error estimates for the solution. Arguments ========= TRANS (input) CHARACTER*1 Specifies the form of the system of equations: = 'N': A * X = B (No transpose) = 'T': A**T * X = B (Transpose) = 'C': A**H * X = B (Conjugate transpose = Transpose) N (input) INTEGER The order of the matrix A. N >= 0. KL (input) INTEGER The number of subdiagonals within the band of A. KL >= 0. KU (input) INTEGER The number of superdiagonals within the band of A. KU >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrices B and X. NRHS >= 0. AB (input) DOUBLE PRECISION array, dimension (LDAB,N) The original band matrix A, stored in rows 1 to KL+KU+1. The j-th column of A is stored in the j-th column of the array AB as follows: AB(ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(n,j+kl). LDAB (input) INTEGER The leading dimension of the array AB. LDAB >= KL+KU+1. AFB (input) DOUBLE PRECISION array, dimension (LDAFB,N) Details of the LU factorization of the band matrix A, as computed by DGBTRF. U is stored as an upper triangular band matrix with KL+KU superdiagonals in rows 1 to KL+KU+1, and the multipliers used during the factorization are stored in rows KL+KU+2 to 2*KL+KU+1. LDAFB (input) INTEGER The leading dimension of the array AFB. LDAFB >= 2*KL*KU+1. IPIV (input) INTEGER array, dimension (N) The pivot indices from DGBTRF; for 1<=i<=N, row i of the matrix was interchanged with row IPIV(i). B (input) DOUBLE PRECISION array, dimension (LDB,NRHS) The right hand side matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). X (input/output) DOUBLE PRECISION array, dimension (LDX,NRHS) On entry, the solution matrix X, as computed by DGBTRS. On exit, the improved solution matrix X. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). FERR (output) DOUBLE PRECISION array, dimension (NRHS) The estimated forward error bound for each solution vector X(j) (the j-th column of the solution matrix X). If XTRUE is the true solution corresponding to X(j), FERR(j) is an estimated upper bound for the magnitude of the largest element in (X(j) - XTRUE) divided by the magnitude of the largest element in X(j). The estimate is as reliable as the estimate for RCOND, and is almost always a slight overestimate of the true error. BERR (output) DOUBLE PRECISION array, dimension (NRHS) The componentwise relative backward error of each solution vector X(j) (i.e., the smallest relative change in any element of A or B that makes X(j) an exact solution). WORK (workspace) DOUBLE PRECISION array, dimension (3*N) IWORK (workspace) INTEGER array, dimension (N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value Internal Parameters =================== ITMAX is the maximum number of steps of iterative refinement. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static doublereal c_b15 = -1.; static doublereal c_b17 = 1.; /* System generated locals */ integer ab_dim1, ab_offset, afb_dim1, afb_offset, b_dim1, b_offset, x_dim1, x_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7; doublereal d__1, d__2, d__3; /* Local variables */ static integer kase; static doublereal safe1, safe2; static integer i__, j, k; static doublereal s; extern /* Subroutine */ int dgbmv_(char *, integer *, integer *, integer * , integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern logical lsame_(char *, char *); extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static integer count, kk; extern doublereal dlamch_(char *); extern /* Subroutine */ int dlacon_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static doublereal xk; static integer nz; static doublereal safmin; extern /* Subroutine */ int xerbla_(char *, integer *), dgbtrs_( char *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, doublereal *, integer *, integer *); static logical notran; static char transt[1]; static doublereal lstres, eps; #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1] #define ab_ref(a_1,a_2) ab[(a_2)*ab_dim1 + a_1] ab_dim1 = *ldab; ab_offset = 1 + ab_dim1 * 1; ab -= ab_offset; afb_dim1 = *ldafb; afb_offset = 1 + afb_dim1 * 1; afb -= afb_offset; --ipiv; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1 * 1; x -= x_offset; --ferr; --berr; --work; --iwork; /* Function Body */ *info = 0; notran = lsame_(trans, "N"); if (! notran && ! lsame_(trans, "T") && ! lsame_( trans, "C")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*kl < 0) { *info = -3; } else if (*ku < 0) { *info = -4; } else if (*nrhs < 0) { *info = -5; } else if (*ldab < *kl + *ku + 1) { *info = -7; } else if (*ldafb < (*kl << 1) + *ku + 1) { *info = -9; } else if (*ldb < max(1,*n)) { *info = -12; } else if (*ldx < max(1,*n)) { *info = -14; } if (*info != 0) { i__1 = -(*info); xerbla_("DGBRFS", &i__1); return 0; } /* Quick return if possible */ if (*n == 0 || *nrhs == 0) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { ferr[j] = 0.; berr[j] = 0.; /* L10: */ } return 0; } if (notran) { *(unsigned char *)transt = 'T'; } else { *(unsigned char *)transt = 'N'; } /* NZ = maximum number of nonzero elements in each row of A, plus 1 Computing MIN */ i__1 = *kl + *ku + 2, i__2 = *n + 1; nz = min(i__1,i__2); eps = dlamch_("Epsilon"); safmin = dlamch_("Safe minimum"); safe1 = nz * safmin; safe2 = safe1 / eps; /* Do for each right hand side */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { count = 1; lstres = 3.; L20: /* Loop until stopping criterion is satisfied. Compute residual R = B - op(A) * X, where op(A) = A, A**T, or A**H, depending on TRANS. */ dcopy_(n, &b_ref(1, j), &c__1, &work[*n + 1], &c__1); dgbmv_(trans, n, n, kl, ku, &c_b15, &ab[ab_offset], ldab, &x_ref(1, j) , &c__1, &c_b17, &work[*n + 1], &c__1); /* Compute componentwise relative backward error from formula max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) ) where abs(Z) is the componentwise absolute value of the matrix or vector Z. If the i-th component of the denominator is less than SAFE2, then SAFE1 is added to the i-th components of the numerator and denominator before dividing. */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] = (d__1 = b_ref(i__, j), abs(d__1)); /* L30: */ } /* Compute abs(op(A))*abs(X) + abs(B). */ if (notran) { i__2 = *n; for (k = 1; k <= i__2; ++k) { kk = *ku + 1 - k; xk = (d__1 = x_ref(k, j), abs(d__1)); /* Computing MAX */ i__3 = 1, i__4 = k - *ku; /* Computing MIN */ i__6 = *n, i__7 = k + *kl; i__5 = min(i__6,i__7); for (i__ = max(i__3,i__4); i__ <= i__5; ++i__) { work[i__] += (d__1 = ab_ref(kk + i__, k), abs(d__1)) * xk; /* L40: */ } /* L50: */ } } else { i__2 = *n; for (k = 1; k <= i__2; ++k) { s = 0.; kk = *ku + 1 - k; /* Computing MAX */ i__5 = 1, i__3 = k - *ku; /* Computing MIN */ i__6 = *n, i__7 = k + *kl; i__4 = min(i__6,i__7); for (i__ = max(i__5,i__3); i__ <= i__4; ++i__) { s += (d__1 = ab_ref(kk + i__, k), abs(d__1)) * (d__2 = x_ref(i__, j), abs(d__2)); /* L60: */ } work[k] += s; /* L70: */ } } s = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { /* Computing MAX */ d__2 = s, d__3 = (d__1 = work[*n + i__], abs(d__1)) / work[ i__]; s = max(d__2,d__3); } else { /* Computing MAX */ d__2 = s, d__3 = ((d__1 = work[*n + i__], abs(d__1)) + safe1) / (work[i__] + safe1); s = max(d__2,d__3); } /* L80: */ } berr[j] = s; /* Test stopping criterion. Continue iterating if 1) The residual BERR(J) is larger than machine epsilon, and 2) BERR(J) decreased by at least a factor of 2 during the last iteration, and 3) At most ITMAX iterations tried. */ if (berr[j] > eps && berr[j] * 2. <= lstres && count <= 5) { /* Update solution and try again. */ dgbtrs_(trans, n, kl, ku, &c__1, &afb[afb_offset], ldafb, &ipiv[1] , &work[*n + 1], n, info); daxpy_(n, &c_b17, &work[*n + 1], &c__1, &x_ref(1, j), &c__1); lstres = berr[j]; ++count; goto L20; } /* Bound error from formula norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))* ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X) where norm(Z) is the magnitude of the largest component of Z inv(op(A)) is the inverse of op(A) abs(Z) is the componentwise absolute value of the matrix or vector Z NZ is the maximum number of nonzeros in any row of A, plus 1 EPS is machine epsilon The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B)) is incremented by SAFE1 if the i-th component of abs(op(A))*abs(X) + abs(B) is less than SAFE2. Use DLACON to estimate the infinity-norm of the matrix inv(op(A)) * diag(W), where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { if (work[i__] > safe2) { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__]; } else { work[i__] = (d__1 = work[*n + i__], abs(d__1)) + nz * eps * work[i__] + safe1; } /* L90: */ } kase = 0; L100: dlacon_(n, &work[(*n << 1) + 1], &work[*n + 1], &iwork[1], &ferr[j], & kase); if (kase != 0) { if (kase == 1) { /* Multiply by diag(W)*inv(op(A)**T). */ dgbtrs_(transt, n, kl, ku, &c__1, &afb[afb_offset], ldafb, & ipiv[1], &work[*n + 1], n, info); i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] *= work[i__]; /* L110: */ } } else { /* Multiply by inv(op(A))*diag(W). */ i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { work[*n + i__] *= work[i__]; /* L120: */ } dgbtrs_(trans, n, kl, ku, &c__1, &afb[afb_offset], ldafb, & ipiv[1], &work[*n + 1], n, info); } goto L100; } /* Normalize error. */ lstres = 0.; i__2 = *n; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = lstres, d__3 = (d__1 = x_ref(i__, j), abs(d__1)); lstres = max(d__2,d__3); /* L130: */ } if (lstres != 0.) { ferr[j] /= lstres; } /* L140: */ } return 0; /* End of DGBRFS */ } /* dgbrfs_ */