/* * transform ket, s2 to label AO symmetry */ int AO2MOmmm_ket_nr_s2(double *vout, double *vin, double *buf, struct _AO2MOEnvs *envs, int seekdim) { switch (seekdim) { case OUTPUTIJ: return envs->nao * envs->ket_count; case INPUT_IJ: return envs->nao * (envs->nao+1) / 2; } const double D0 = 0; const double D1 = 1; const char SIDE_L = 'L'; const char UPLO_U = 'U'; int nao = envs->nao; int j_start = envs->ket_start; int j_count = envs->ket_count; double *mo_coeff = envs->mo_coeff; int i, j; dsymm_(&SIDE_L, &UPLO_U, &nao, &j_count, &D1, vin, &nao, mo_coeff+j_start*nao, &nao, &D0, buf, &nao); for (j = 0; j < nao; j++) { for (i = 0; i < j_count; i++) { vout[i] = buf[i*nao+j]; } vout += j_count; } return 0; }
/* * s2-AO integrals to s1-MO integrals, efficient for i_count > j_count * shape requirements: * vout[:,bra_count*ket_count], eri[:,nao*(nao+1)/2] */ int AO2MOmmm_nr_s2_igtj(double *vout, double *eri, struct _AO2MOEnvs *envs, int seekdim) { switch (seekdim) { case 1: return envs->bra_count * envs->ket_count; case 2: return envs->nao * (envs->nao+1) / 2; } const double D0 = 0; const double D1 = 1; const char SIDE_L = 'L'; const char UPLO_U = 'U'; const char TRANS_T = 'T'; const char TRANS_N = 'N'; int nao = envs->nao; int i_start = envs->bra_start; int i_count = envs->bra_count; int j_start = envs->ket_start; int j_count = envs->ket_count; double *mo_coeff = envs->mo_coeff; double *buf = malloc(sizeof(double)*nao*j_count); // C_qj (pq| = (pj|, where (pq| is in C-order dsymm_(&SIDE_L, &UPLO_U, &nao, &j_count, &D1, eri, &nao, mo_coeff+j_start*nao, &nao, &D0, buf, &nao); // C_pi (pj| = (ij| dgemm_(&TRANS_T, &TRANS_N, &j_count, &i_count, &nao, &D1, buf, &nao, mo_coeff+i_start*nao, &nao, &D0, vout, &j_count); free(buf); return 0; }
int f2c_dsymm(char* side, char* uplo, integer* M, integer* N, doublereal* alpha, doublereal* A, integer* lda, doublereal* B, integer* ldb, doublereal* beta, doublereal* C, integer* ldc) { dsymm_(side, uplo, M, N, alpha, A, lda, B, ldb, beta, C, ldc); return 0; }
void dsymm(const SIDE Side, const UPLO Uplo, const int M, const int N, const double alpha, const double *A, const int lda, const double *B, const int ldb, const double beta, double *C, const int ldc) { dsymm_(SideChar[Side], UploChar[Uplo], &M, &N, &alpha, A, &lda, B, &ldb, &beta, C, &ldc); }
void la_dsymm(int Alhs, int Arow, int Acol, int Brow, int Bcol, int Crow, int Ccol, double **A, double **B, double **C, double alpha, double beta) { int m, n, lda, ldb, ldc; char Atri = 'L'; // reference the lower triangle of column major A. char Aside; assert(Arow=Acol); m = Crow; n = Ccol; lda = Arow; ldb = Brow; ldc = Crow; assert(ldb == m); if(Alhs){ Aside='L'; assert(lda == m); } else{ Aside='R'; assert(lda == n); } dsymm_(&Aside,&Atri,&m,&n,&alpha,*A,&lda,*B,&ldb,&beta,*C,&ldc); }
/* * transform bra, s2 to label AO symmetry */ int AO2MOmmm_bra_nr_s2(double *vout, double *vin, double *buf, struct _AO2MOEnvs *envs, int seekdim) { switch (seekdim) { case OUTPUTIJ: return envs->bra_count * envs->nao; case INPUT_IJ: return envs->nao * (envs->nao+1) / 2; } const double D0 = 0; const double D1 = 1; const char SIDE_L = 'L'; const char UPLO_U = 'U'; int nao = envs->nao; int i_start = envs->bra_start; int i_count = envs->bra_count; double *mo_coeff = envs->mo_coeff; dsymm_(&SIDE_L, &UPLO_U, &nao, &i_count, &D1, vin, &nao, mo_coeff+i_start*nao, &nao, &D0, vout, &nao); return 0; }
/* * s2-AO integrals to s2-MO integrals * shape requirements: * vout[:,bra_count*(bra_count+1)/2] and bra_count==ket_count, * eri[:,nao*(nao+1)/2] * first s2 is the AO symmetry, second s2 is the MO symmetry */ int AO2MOmmm_nr_s2_s2(double *vout, double *eri, struct _AO2MOEnvs *envs, int seekdim) { switch (seekdim) { case 1: assert(envs->bra_count == envs->ket_count); return envs->bra_count * (envs->bra_count+1) / 2; case 2: return envs->nao * (envs->nao+1) / 2; } const double D0 = 0; const double D1 = 1; const char SIDE_L = 'L'; const char UPLO_U = 'U'; int nao = envs->nao; int i_start = envs->bra_start; int i_count = envs->bra_count; int j_start = envs->ket_start; int j_count = envs->ket_count; double *mo_coeff = envs->mo_coeff; double *buf = malloc(sizeof(double)*(nao*i_count+i_count*j_count)); double *buf1 = buf + nao*i_count; int i, j, ij; // C_pi (pq| = (iq|, where (pq| is in C-order dsymm_(&SIDE_L, &UPLO_U, &nao, &i_count, &D1, eri, &nao, mo_coeff+i_start*nao, &nao, &D0, buf, &nao); AO2MOdtriumm_o1(j_count, i_count, nao, 0, mo_coeff+j_start*nao, buf, buf1); for (i = 0, ij = 0; i < i_count; i++) { for (j = 0; j <= i; j++, ij++) { vout[ij] = buf1[j]; } buf1 += j_count; } free(buf); return 0; }
/* Subroutine */ int dpot03_(char *uplo, integer *n, doublereal *a, integer * lda, doublereal *ainv, integer *ldainv, doublereal *work, integer * ldwork, doublereal *rwork, doublereal *rcond, doublereal *resid) { /* System generated locals */ integer a_dim1, a_offset, ainv_dim1, ainv_offset, work_dim1, work_offset, i__1, i__2; /* Local variables */ integer i__, j; doublereal eps; extern logical lsame_(char *, char *); doublereal anorm; extern /* Subroutine */ int dsymm_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern doublereal dlamch_(char *), dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); doublereal ainvnm; extern doublereal dlansy_(char *, char *, integer *, doublereal *, integer *, doublereal *); /* -- LAPACK test routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPOT03 computes the residual for a symmetric matrix times its */ /* inverse: */ /* norm( I - A*AINV ) / ( N * norm(A) * norm(AINV) * EPS ), */ /* where EPS is the machine epsilon. */ /* Arguments */ /* ========== */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the upper or lower triangular part of the */ /* symmetric matrix A is stored: */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* N (input) INTEGER */ /* The number of rows and columns of the matrix A. N >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The original symmetric matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N) */ /* AINV (input/output) DOUBLE PRECISION array, dimension (LDAINV,N) */ /* On entry, the inverse of the matrix A, stored as a symmetric */ /* matrix in the same format as A. */ /* In this version, AINV is expanded into a full matrix and */ /* multiplied by A, so the opposing triangle of AINV will be */ /* changed; i.e., if the upper triangular part of AINV is */ /* stored, the lower triangular part will be used as work space. */ /* LDAINV (input) INTEGER */ /* The leading dimension of the array AINV. LDAINV >= max(1,N). */ /* WORK (workspace) DOUBLE PRECISION array, dimension (LDWORK,N) */ /* LDWORK (input) INTEGER */ /* The leading dimension of the array WORK. LDWORK >= max(1,N). */ /* RWORK (workspace) DOUBLE PRECISION array, dimension (N) */ /* RCOND (output) DOUBLE PRECISION */ /* The reciprocal of the condition number of A, computed as */ /* ( 1/norm(A) ) / norm(AINV). */ /* RESID (output) DOUBLE PRECISION */ /* norm(I - A*AINV) / ( N * norm(A) * norm(AINV) * EPS ) */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Quick exit if N = 0. */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; ainv_dim1 = *ldainv; ainv_offset = 1 + ainv_dim1; ainv -= ainv_offset; work_dim1 = *ldwork; work_offset = 1 + work_dim1; work -= work_offset; --rwork; /* Function Body */ if (*n <= 0) { *rcond = 1.; *resid = 0.; return 0; } /* Exit with RESID = 1/EPS if ANORM = 0 or AINVNM = 0. */ eps = dlamch_("Epsilon"); anorm = dlansy_("1", uplo, n, &a[a_offset], lda, &rwork[1]); ainvnm = dlansy_("1", uplo, n, &ainv[ainv_offset], ldainv, &rwork[1]); if (anorm <= 0. || ainvnm <= 0.) { *rcond = 0.; *resid = 1. / eps; return 0; } *rcond = 1. / anorm / ainvnm; /* Expand AINV into a full matrix and call DSYMM to multiply */ /* AINV on the left by A. */ if (lsame_(uplo, "U")) { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { ainv[j + i__ * ainv_dim1] = ainv[i__ + j * ainv_dim1]; /* L10: */ } /* L20: */ } } else { i__1 = *n; for (j = 1; j <= i__1; ++j) { i__2 = *n; for (i__ = j + 1; i__ <= i__2; ++i__) { ainv[j + i__ * ainv_dim1] = ainv[i__ + j * ainv_dim1]; /* L30: */ } /* L40: */ } } dsymm_("Left", uplo, n, n, &c_b11, &a[a_offset], lda, &ainv[ainv_offset], ldainv, &c_b12, &work[work_offset], ldwork); /* Add the identity matrix to WORK . */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { work[i__ + i__ * work_dim1] += 1.; /* L50: */ } /* Compute norm(I - A*AINV) / (N * norm(A) * norm(AINV) * EPS) */ *resid = dlange_("1", n, n, &work[work_offset], ldwork, &rwork[1]); *resid = *resid * *rcond / eps / (doublereal) (*n); return 0; /* End of DPOT03 */ } /* dpot03_ */
/* Subroutine */ int dlarhs_(char *path, char *xtype, char *uplo, char *trans, integer *m, integer *n, integer *kl, integer *ku, integer *nrhs, doublereal *a, integer *lda, doublereal *x, integer *ldx, doublereal * b, integer *ldb, integer *iseed, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset, i__1; /* Builtin functions Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); /* Local variables */ static logical band; static char diag[1]; static logical tran; static integer j; extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dgbmv_(char *, integer *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern logical lsame_(char *, char *); extern /* Subroutine */ int dsbmv_(char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dtbmv_(char *, char *, char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *); static char c1[1], c2[2]; extern /* Subroutine */ int dspmv_(char *, integer *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, doublereal *, integer *), dsymm_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dtpmv_( char *, char *, char *, integer *, doublereal *, doublereal *, integer *); static integer mb, nx; extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *); extern logical lsamen_(integer *, char *, char *); extern /* Subroutine */ int dlarnv_(integer *, integer *, integer *, doublereal *); static logical notran, gen, tri, qrs, sym; #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= DLARHS chooses a set of NRHS random solution vectors and sets up the right hand sides for the linear system op( A ) * X = B, where op( A ) may be A or A' (transpose of A). Arguments ========= PATH (input) CHARACTER*3 The type of the real matrix A. PATH may be given in any combination of upper and lower case. Valid types include xGE: General m x n matrix xGB: General banded matrix xPO: Symmetric positive definite, 2-D storage xPP: Symmetric positive definite packed xPB: Symmetric positive definite banded xSY: Symmetric indefinite, 2-D storage xSP: Symmetric indefinite packed xSB: Symmetric indefinite banded xTR: Triangular xTP: Triangular packed xTB: Triangular banded xQR: General m x n matrix xLQ: General m x n matrix xQL: General m x n matrix xRQ: General m x n matrix where the leading character indicates the precision. XTYPE (input) CHARACTER*1 Specifies how the exact solution X will be determined: = 'N': New solution; generate a random X. = 'C': Computed; use value of X on entry. UPLO (input) CHARACTER*1 Specifies whether the upper or lower triangular part of the matrix A is stored, if A is symmetric. = 'U': Upper triangular = 'L': Lower triangular TRANS (input) CHARACTER*1 Specifies the operation applied to the matrix A. = 'N': System is A * x = b = 'T': System is A'* x = b = 'C': System is A'* x = b M (input) INTEGER The number or rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0. KL (input) INTEGER Used only if A is a band matrix; specifies the number of subdiagonals of A if A is a general band matrix or if A is symmetric or triangular and UPLO = 'L'; specifies the number of superdiagonals of A if A is symmetric or triangular and UPLO = 'U'. 0 <= KL <= M-1. KU (input) INTEGER Used only if A is a general band matrix or if A is triangular. If PATH = xGB, specifies the number of superdiagonals of A, and 0 <= KU <= N-1. If PATH = xTR, xTP, or xTB, specifies whether or not the matrix has unit diagonal: = 1: matrix has non-unit diagonal (default) = 2: matrix has unit diagonal NRHS (input) INTEGER The number of right hand side vectors in the system A*X = B. A (input) DOUBLE PRECISION array, dimension (LDA,N) The test matrix whose type is given by PATH. LDA (input) INTEGER The leading dimension of the array A. If PATH = xGB, LDA >= KL+KU+1. If PATH = xPB, xSB, xHB, or xTB, LDA >= KL+1. Otherwise, LDA >= max(1,M). X (input or output) DOUBLE PRECISION array, dimension(LDX,NRHS) On entry, if XTYPE = 'C' (for 'Computed'), then X contains the exact solution to the system of linear equations. On exit, if XTYPE = 'N' (for 'New'), then X is initialized with random values. LDX (input) INTEGER The leading dimension of the array X. If TRANS = 'N', LDX >= max(1,N); if TRANS = 'T', LDX >= max(1,M). B (output) DOUBLE PRECISION array, dimension (LDB,NRHS) The right hand side vector(s) for the system of equations, computed from B = op(A) * X, where op(A) is determined by TRANS. LDB (input) INTEGER The leading dimension of the array B. If TRANS = 'N', LDB >= max(1,M); if TRANS = 'T', LDB >= max(1,N). ISEED (input/output) INTEGER array, dimension (4) The seed vector for the random number generator (used in DLATMS). Modified on exit. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input parameters. Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1 * 1; x -= x_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --iseed; /* Function Body */ *info = 0; *(unsigned char *)c1 = *(unsigned char *)path; s_copy(c2, path + 1, (ftnlen)2, (ftnlen)2); tran = lsame_(trans, "T") || lsame_(trans, "C"); notran = ! tran; gen = lsame_(path + 1, "G"); qrs = lsame_(path + 1, "Q") || lsame_(path + 2, "Q"); sym = lsame_(path + 1, "P") || lsame_(path + 1, "S"); tri = lsame_(path + 1, "T"); band = lsame_(path + 2, "B"); if (! lsame_(c1, "Double precision")) { *info = -1; } else if (! (lsame_(xtype, "N") || lsame_(xtype, "C"))) { *info = -2; } else if ((sym || tri) && ! (lsame_(uplo, "U") || lsame_(uplo, "L"))) { *info = -3; } else if ((gen || qrs) && ! (tran || lsame_(trans, "N"))) { *info = -4; } else if (*m < 0) { *info = -5; } else if (*n < 0) { *info = -6; } else if (band && *kl < 0) { *info = -7; } else if (band && *ku < 0) { *info = -8; } else if (*nrhs < 0) { *info = -9; } else if (! band && *lda < max(1,*m) || band && (sym || tri) && *lda < * kl + 1 || band && gen && *lda < *kl + *ku + 1) { *info = -11; } else if (notran && *ldx < max(1,*n) || tran && *ldx < max(1,*m)) { *info = -13; } else if (notran && *ldb < max(1,*m) || tran && *ldb < max(1,*n)) { *info = -15; } if (*info != 0) { i__1 = -(*info); xerbla_("DLARHS", &i__1); return 0; } /* Initialize X to NRHS random vectors unless XTYPE = 'C'. */ if (tran) { nx = *m; mb = *n; } else { nx = *n; mb = *m; } if (! lsame_(xtype, "C")) { i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { dlarnv_(&c__2, &iseed[1], n, &x_ref(1, j)); /* L10: */ } } /* Multiply X by op( A ) using an appropriate matrix multiply routine. */ if (lsamen_(&c__2, c2, "GE") || lsamen_(&c__2, c2, "QR") || lsamen_(&c__2, c2, "LQ") || lsamen_(&c__2, c2, "QL") || lsamen_(&c__2, c2, "RQ")) { /* General matrix */ dgemm_(trans, "N", &mb, nrhs, &nx, &c_b32, &a[a_offset], lda, &x[ x_offset], ldx, &c_b33, &b[b_offset], ldb); } else if (lsamen_(&c__2, c2, "PO") || lsamen_(& c__2, c2, "SY")) { /* Symmetric matrix, 2-D storage */ dsymm_("Left", uplo, n, nrhs, &c_b32, &a[a_offset], lda, &x[x_offset], ldx, &c_b33, &b[b_offset], ldb); } else if (lsamen_(&c__2, c2, "GB")) { /* General matrix, band storage */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { dgbmv_(trans, &mb, &nx, kl, ku, &c_b32, &a[a_offset], lda, &x_ref( 1, j), &c__1, &c_b33, &b_ref(1, j), &c__1); /* L20: */ } } else if (lsamen_(&c__2, c2, "PB")) { /* Symmetric matrix, band storage */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { dsbmv_(uplo, n, kl, &c_b32, &a[a_offset], lda, &x_ref(1, j), & c__1, &c_b33, &b_ref(1, j), &c__1); /* L30: */ } } else if (lsamen_(&c__2, c2, "PP") || lsamen_(& c__2, c2, "SP")) { /* Symmetric matrix, packed storage */ i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { dspmv_(uplo, n, &c_b32, &a[a_offset], &x_ref(1, j), &c__1, &c_b33, &b_ref(1, j), &c__1); /* L40: */ } } else if (lsamen_(&c__2, c2, "TR")) { /* Triangular matrix. Note that for triangular matrices, KU = 1 => non-unit triangular KU = 2 => unit triangular */ dlacpy_("Full", n, nrhs, &x[x_offset], ldx, &b[b_offset], ldb); if (*ku == 2) { *(unsigned char *)diag = 'U'; } else { *(unsigned char *)diag = 'N'; } dtrmm_("Left", uplo, trans, diag, n, nrhs, &c_b32, &a[a_offset], lda, &b[b_offset], ldb) ; } else if (lsamen_(&c__2, c2, "TP")) { /* Triangular matrix, packed storage */ dlacpy_("Full", n, nrhs, &x[x_offset], ldx, &b[b_offset], ldb); if (*ku == 2) { *(unsigned char *)diag = 'U'; } else { *(unsigned char *)diag = 'N'; } i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { dtpmv_(uplo, trans, diag, n, &a[a_offset], &b_ref(1, j), &c__1); /* L50: */ } } else if (lsamen_(&c__2, c2, "TB")) { /* Triangular matrix, banded storage */ dlacpy_("Full", n, nrhs, &x[x_offset], ldx, &b[b_offset], ldb); if (*ku == 2) { *(unsigned char *)diag = 'U'; } else { *(unsigned char *)diag = 'N'; } i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { dtbmv_(uplo, trans, diag, n, kl, &a[a_offset], lda, &b_ref(1, j), &c__1); /* L60: */ } } else { /* If PATH is none of the above, return with an error code. */ *info = -1; i__1 = -(*info); xerbla_("DLARHS", &i__1); } return 0; /* End of DLARHS */ } /* dlarhs_ */
/* Subroutine */ int dpot06_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *x, integer *ldx, doublereal * b, integer *ldb, doublereal *rwork, doublereal *resid) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset, i__1; doublereal d__1, d__2; /* Local variables */ integer j; doublereal eps; integer ifail; doublereal anorm, bnorm; extern /* Subroutine */ int dsymm_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); doublereal xnorm; extern doublereal dlamch_(char *); extern integer idamax_(integer *, doublereal *, integer *); extern doublereal dlansy_(char *, char *, integer *, doublereal *, integer *, doublereal *); /* -- LAPACK test routine (version 3.1.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* April 2007 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DPOT06 computes the residual for a solution of a system of linear */ /* equations A*x = b : */ /* RESID = norm(B - A*X,inf) / ( norm(A,inf) * norm(X,inf) * EPS ), */ /* where EPS is the machine epsilon. */ /* Arguments */ /* ========= */ /* UPLO (input) CHARACTER*1 */ /* Specifies whether the upper or lower triangular part of the */ /* symmetric matrix A is stored: */ /* = 'U': Upper triangular */ /* = 'L': Lower triangular */ /* N (input) INTEGER */ /* The number of rows and columns of the matrix A. N >= 0. */ /* NRHS (input) INTEGER */ /* The number of columns of B, the matrix of right hand sides. */ /* NRHS >= 0. */ /* A (input) DOUBLE PRECISION array, dimension (LDA,N) */ /* The original M x N matrix A. */ /* LDA (input) INTEGER */ /* The leading dimension of the array A. LDA >= max(1,N). */ /* X (input) DOUBLE PRECISION array, dimension (LDX,NRHS) */ /* The computed solution vectors for the system of linear */ /* equations. */ /* LDX (input) INTEGER */ /* The leading dimension of the array X. If TRANS = 'N', */ /* LDX >= max(1,N); if TRANS = 'T' or 'C', LDX >= max(1,N). */ /* B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) */ /* On entry, the right hand side vectors for the system of */ /* linear equations. */ /* On exit, B is overwritten with the difference B - A*X. */ /* LDB (input) INTEGER */ /* The leading dimension of the array B. IF TRANS = 'N', */ /* LDB >= max(1,M); if TRANS = 'T' or 'C', LDB >= max(1,N). */ /* RWORK (workspace) DOUBLE PRECISION array, dimension (N) */ /* RESID (output) DOUBLE PRECISION */ /* The maximum over the number of right hand sides of */ /* norm(B - A*X) / ( norm(A) * norm(X) * EPS ). */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Quick exit if N = 0 or NRHS = 0 */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; --rwork; /* Function Body */ if (*n <= 0 || *nrhs == 0) { *resid = 0.; return 0; } /* Exit with RESID = 1/EPS if ANORM = 0. */ eps = dlamch_("Epsilon"); anorm = dlansy_("I", uplo, n, &a[a_offset], lda, &rwork[1]); if (anorm <= 0.) { *resid = 1. / eps; return 0; } /* Compute B - A*X and store in B. */ ifail = 0; dsymm_("Left", uplo, n, nrhs, &c_b5, &a[a_offset], lda, &x[x_offset], ldx, &c_b6, &b[b_offset], ldb); /* Compute the maximum over the number of right hand sides of */ /* norm(B - A*X) / ( norm(A) * norm(X) * EPS ) . */ *resid = 0.; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { bnorm = (d__1 = b[idamax_(n, &b[j * b_dim1 + 1], &c__1) + j * b_dim1], abs(d__1)); xnorm = (d__1 = x[idamax_(n, &x[j * x_dim1 + 1], &c__1) + j * x_dim1], abs(d__1)); if (xnorm <= 0.) { *resid = 1. / eps; } else { /* Computing MAX */ d__1 = *resid, d__2 = bnorm / anorm / xnorm / eps; *resid = max(d__1,d__2); } /* L10: */ } return 0; /* End of DPOT06 */ } /* dpot06_ */
/* Subroutine */ int dpot02_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *x, integer *ldx, doublereal * b, integer *ldb, doublereal *rwork, doublereal *resid) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, x_dim1, x_offset, i__1; doublereal d__1, d__2; /* Local variables */ static integer j; extern doublereal dasum_(integer *, doublereal *, integer *); static doublereal anorm, bnorm; extern /* Subroutine */ int dsymm_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); static doublereal xnorm; extern doublereal dlamch_(char *), dlansy_(char *, char *, integer *, doublereal *, integer *, doublereal *); static doublereal eps; #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define x_ref(a_1,a_2) x[(a_2)*x_dim1 + a_1] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University February 29, 1992 Purpose ======= DPOT02 computes the residual for the solution of a symmetric system of linear equations A*x = b: RESID = norm(B - A*X) / ( norm(A) * norm(X) * EPS ), where EPS is the machine epsilon. Arguments ========= UPLO (input) CHARACTER*1 Specifies whether the upper or lower triangular part of the symmetric matrix A is stored: = 'U': Upper triangular = 'L': Lower triangular N (input) INTEGER The number of rows and columns of the matrix A. N >= 0. NRHS (input) INTEGER The number of columns of B, the matrix of right hand sides. NRHS >= 0. A (input) DOUBLE PRECISION array, dimension (LDA,N) The original symmetric matrix A. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N) X (input) DOUBLE PRECISION array, dimension (LDX,NRHS) The computed solution vectors for the system of linear equations. LDX (input) INTEGER The leading dimension of the array X. LDX >= max(1,N). B (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS) On entry, the right hand side vectors for the system of linear equations. On exit, B is overwritten with the difference B - A*X. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). RWORK (workspace) DOUBLE PRECISION array, dimension (N) RESID (output) DOUBLE PRECISION The maximum over the number of right hand sides of norm(B - A*X) / ( norm(A) * norm(X) * EPS ). ===================================================================== Quick exit if N = 0 or NRHS = 0. Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1 * 1; x -= x_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; --rwork; /* Function Body */ if (*n <= 0 || *nrhs <= 0) { *resid = 0.; return 0; } /* Exit with RESID = 1/EPS if ANORM = 0. */ eps = dlamch_("Epsilon"); anorm = dlansy_("1", uplo, n, &a[a_offset], lda, &rwork[1]); if (anorm <= 0.) { *resid = 1. / eps; return 0; } /* Compute B - A*X */ dsymm_("Left", uplo, n, nrhs, &c_b5, &a[a_offset], lda, &x[x_offset], ldx, &c_b6, &b[b_offset], ldb); /* Compute the maximum over the number of right hand sides of norm( B - A*X ) / ( norm(A) * norm(X) * EPS ) . */ *resid = 0.; i__1 = *nrhs; for (j = 1; j <= i__1; ++j) { bnorm = dasum_(n, &b_ref(1, j), &c__1); xnorm = dasum_(n, &x_ref(1, j), &c__1); if (xnorm <= 0.) { *resid = 1. / eps; } else { /* Computing MAX */ d__1 = *resid, d__2 = bnorm / anorm / xnorm / eps; *resid = max(d__1,d__2); } /* L10: */ } return 0; /* End of DPOT02 */ } /* dpot02_ */
int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt_a, dt_b, dt_c; num_t dt_alpha, dt_beta; int r, n_repeats; side_t side; uplo_t uplo; double dtime; double dtime_save; double gflops; bli_init(); n_repeats = 3; if( argc < 7 ) { printf("Usage:\n"); printf("test_foo.x m n k p_begin p_inc p_end:\n"); exit; } int world_size, world_rank, provided; MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided ); MPI_Comm_size( MPI_COMM_WORLD, &world_size ); MPI_Comm_rank( MPI_COMM_WORLD, &world_rank ); m_input = strtol( argv[1], NULL, 10 ); n_input = strtol( argv[2], NULL, 10 ); p_begin = strtol( argv[4], NULL, 10 ); p_inc = strtol( argv[5], NULL, 10 ); p_end = strtol( argv[6], NULL, 10 ); #if 1 dt_a = BLIS_DOUBLE; dt_b = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; #else dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_DCOMPLEX; #endif side = BLIS_LEFT; //side = BLIS_RIGHT; uplo = BLIS_LOWER; //uplo = BLIS_UPPER; for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); if ( bli_is_left( side ) ) bli_obj_create( dt_a, m, m, 0, 0, &a ); else bli_obj_create( dt_a, n, n, 0, 0, &a ); bli_obj_create( dt_b, m, n, 0, 0, &b ); bli_obj_create( dt_c, m, n, 0, 0, &c ); bli_obj_create( dt_c, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_struc( BLIS_HERMITIAN, &a ); bli_obj_set_uplo( uplo, &a ); // Randomize A, make it densely Hermitian, and zero the unstored // triangle to ensure the implementation reads only from the stored // region. bli_randm( &a ); bli_mkherm( &a ); bli_mktrim( &a ); /* bli_obj_toggle_uplo( &a ); bli_obj_inc_diag_offset( 1, &a ); bli_setm( &BLIS_ZERO, &a ); bli_obj_inc_diag_offset( -1, &a ); bli_obj_toggle_uplo( &a ); bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a ); bli_scalm( &BLIS_TWO, &a ); bli_scalm( &BLIS_TWO, &a ); */ bli_setsc( (2.0/1.0), 1.0, &alpha ); bli_setsc( (1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT /* obj_t ar, ai; bli_obj_alias_to( &a, &ar ); bli_obj_alias_to( &a, &ai ); bli_obj_set_dt( BLIS_DOUBLE, &ar ); ar.rs *= 2; ar.cs *= 2; bli_obj_set_dt( BLIS_DOUBLE, &ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1; bli_printm( "ar", &ar, "%4.1f", "" ); bli_printm( "ai", &ai, "%4.1f", "" ); */ bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); bli_hemm( side, //bli_hemm4m( side, &alpha, &a, &b, &beta, &c ); #else f77_char side = 'L'; f77_char uplo = 'L'; f77_int mm = bli_obj_length( &c ); f77_int nn = bli_obj_width( &c ); f77_int lda = bli_obj_col_stride( &a ); f77_int ldb = bli_obj_col_stride( &b ); f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); double* ap = bli_obj_buffer( &a ); double* bp = bli_obj_buffer( &b ); double* betap = bli_obj_buffer( &beta ); double* cp = bli_obj_buffer( &c ); dsymm_( &side, &uplo, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); #endif #ifdef PRINT bli_printm( "c after", &c, "%9.5f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 2.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 2.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt_a ) ) gflops *= 4.0; #ifdef BLIS printf( "data_hemm_blis" ); #else printf( "data_hemm_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t x, y; obj_t alpha, beta; dim_t m; num_t dt_a, dt_b, dt_c; num_t dt_alpha, dt_beta; int ii; #ifdef NBLIS bli_init(); #endif m = 4000; dt_a = BLIS_DOUBLE; dt_b = BLIS_DOUBLE; dt_c = BLIS_DOUBLE; dt_alpha = BLIS_DOUBLE; dt_beta = BLIS_DOUBLE; { #ifdef NBLIS bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha ); bli_obj_create( dt_beta, 1, 1, 0, 0, &beta ); bli_obj_create( dt_a, m, 1, 0, 0, &x ); bli_obj_create( dt_a, m, 1, 0, 0, &y ); bli_obj_create( dt_a, m, m, 0, 0, &a ); bli_obj_create( dt_b, m, m, 0, 0, &b ); bli_obj_create( dt_c, m, m, 0, 0, &c ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_setsc( (2.0/1.0), 0.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); #endif #ifdef NBLAS x.buffer = malloc( m * 1 * sizeof( double ) ); y.buffer = malloc( m * 1 * sizeof( double ) ); alpha.buffer = malloc( 1 * sizeof( double ) ); beta.buffer = malloc( 1 * sizeof( double ) ); a.buffer = malloc( m * m * sizeof( double ) ); a.m = m; a.n = m; a.cs = m; b.buffer = malloc( m * m * sizeof( double ) ); b.m = m; b.n = m; b.cs = m; c.buffer = malloc( m * m * sizeof( double ) ); c.m = m; c.n = m; c.cs = m; *((double*)alpha.buffer) = 2.0; *((double*)beta.buffer) = -1.0; #endif #ifdef NBLIS #if NBLIS >= 1 for ( ii = 0; ii < 2000000000; ++ii ) { bli_gemm( &BLIS_ONE, &a, &b, &BLIS_ONE, &c ); } #endif #if NBLIS >= 2 { bli_hemm( BLIS_LEFT, &BLIS_ONE, &a, &b, &BLIS_ONE, &c ); } #endif #if NBLIS >= 3 { bli_herk( &BLIS_ONE, &a, &BLIS_ONE, &c ); } #endif #if NBLIS >= 4 { bli_her2k( &BLIS_ONE, &a, &b, &BLIS_ONE, &c ); } #endif #if NBLIS >= 5 { bli_trmm( BLIS_LEFT, &BLIS_ONE, &a, &c ); } #endif #if NBLIS >= 6 { bli_trsm( BLIS_LEFT, &BLIS_ONE, &a, &c ); } #endif #endif #ifdef NBLAS #if NBLAS >= 1 for ( ii = 0; ii < 2000000000; ++ii ) { f77_char transa = 'N'; f77_char transb = 'N'; f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dgemm_( &transa, &transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 2 { f77_char side = 'L'; f77_char uplo = 'L'; f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dsymm_( &side, &uplo, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 3 { f77_char uplo = 'L'; f77_char trans = 'N'; f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dsyrk_( &uplo, &trans, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } #endif #if NBLAS >= 4 { f77_char uplo = 'L'; f77_char trans = 'N'; f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dsyr2k_( &uplo, &trans, &mm, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 5 { f77_char side = 'L'; f77_char uplo = 'L'; f77_char trans = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* cp = bli_obj_buffer( c ); dtrmm_( &side, &uplo, &trans, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #if NBLAS >= 6 { f77_char side = 'L'; f77_char uplo = 'L'; f77_char trans = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* cp = bli_obj_buffer( c ); dtrsm_( &side, &uplo, &trans, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #if NBLAS >= 7 { f77_char transa = 'N'; f77_char transb = 'N'; f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width_after_trans( a ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* bp = bli_obj_buffer( b ); dcomplex* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zgemm_( &transa, &transb, &mm, &nn, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 8 { f77_char side = 'L'; f77_char uplo = 'L'; f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* bp = bli_obj_buffer( b ); dcomplex* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zhemm_( &side, &uplo, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 9 { f77_char uplo = 'L'; f77_char trans = 'N'; f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); double* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zherk_( &uplo, &trans, &mm, &kk, alphap, ap, &lda, betap, cp, &ldc ); } #endif #if NBLAS >= 10 { f77_char uplo = 'L'; f77_char trans = 'N'; f77_int mm = bli_obj_length( c ); f77_int kk = bli_obj_width( a ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zher2k_( &uplo, &trans, &mm, &kk, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #if NBLAS >= 11 { f77_char side = 'L'; f77_char uplo = 'L'; f77_char trans = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* cp = bli_obj_buffer( c ); ztrmm_( &side, &uplo, &trans, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #if NBLAS >= 12 { f77_char side = 'L'; f77_char uplo = 'L'; f77_char trans = 'N'; f77_char diag = 'N'; f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* cp = bli_obj_buffer( c ); ztrsm_( &side, &uplo, &trans, &diag, &mm, &nn, alphap, ap, &lda, cp, &ldc ); } #endif #endif #ifdef NBLIS bli_obj_free( &x ); bli_obj_free( &y ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); #endif #ifdef NBLAS free( x.buffer ); free( y.buffer ); free( alpha.buffer ); free( beta.buffer ); free( a.buffer ); free( b.buffer ); free( c.buffer ); #endif } #ifdef NBLIS bli_finalize(); #endif return 0; }
/* Subroutine */ int dsgt01_(integer *itype, char *uplo, integer *n, integer * m, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *z__, integer *ldz, doublereal *d__, doublereal *work, doublereal *result) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, z_dim1, z_offset, i__1; /* Local variables */ static integer i__; extern /* Subroutine */ int dscal_(integer *, doublereal *, doublereal *, integer *); static doublereal anorm; extern /* Subroutine */ int dsymm_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); extern doublereal dlamch_(char *), dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *), dlansy_(char *, char *, integer *, doublereal *, integer *, doublereal *); static doublereal ulp; #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 modified August 1997, a new parameter M is added to the calling sequence. Purpose ======= DDGT01 checks a decomposition of the form A Z = B Z D or A B Z = Z D or B A Z = Z D where A is a symmetric matrix, B is symmetric positive definite, Z is orthogonal, and D is diagonal. One of the following test ratios is computed: ITYPE = 1: RESULT(1) = | A Z - B Z D | / ( |A| |Z| n ulp ) ITYPE = 2: RESULT(1) = | A B Z - Z D | / ( |A| |Z| n ulp ) ITYPE = 3: RESULT(1) = | B A Z - Z D | / ( |A| |Z| n ulp ) Arguments ========= ITYPE (input) INTEGER The form of the symmetric generalized eigenproblem. = 1: A*z = (lambda)*B*z = 2: A*B*z = (lambda)*z = 3: B*A*z = (lambda)*z UPLO (input) CHARACTER*1 Specifies whether the upper or lower triangular part of the symmetric matrices A and B is stored. = 'U': Upper triangular = 'L': Lower triangular N (input) INTEGER The order of the matrix A. N >= 0. M (input) INTEGER The number of eigenvalues found. 0 <= M <= N. A (input) DOUBLE PRECISION array, dimension (LDA, N) The original symmetric matrix A. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input) DOUBLE PRECISION array, dimension (LDB, N) The original symmetric positive definite matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). Z (input) DOUBLE PRECISION array, dimension (LDZ, M) The computed eigenvectors of the generalized eigenproblem. LDZ (input) INTEGER The leading dimension of the array Z. LDZ >= max(1,N). D (input) DOUBLE PRECISION array, dimension (M) The computed eigenvalues of the generalized eigenproblem. WORK (workspace) DOUBLE PRECISION array, dimension (N*N) RESULT (output) DOUBLE PRECISION array, dimension (1) The test ratio as described above. ===================================================================== Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; z_dim1 = *ldz; z_offset = 1 + z_dim1 * 1; z__ -= z_offset; --d__; --work; --result; /* Function Body */ result[1] = 0.; if (*n <= 0) { return 0; } ulp = dlamch_("Epsilon"); /* Compute product of 1-norms of A and Z. */ anorm = dlansy_("1", uplo, n, &a[a_offset], lda, &work[1]) * dlange_("1", n, m, &z__[z_offset], ldz, &work[1]); if (anorm == 0.) { anorm = 1.; } if (*itype == 1) { /* Norm of AZ - BZD */ dsymm_("Left", uplo, n, m, &c_b6, &a[a_offset], lda, &z__[z_offset], ldz, &c_b7, &work[1], n); i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { dscal_(n, &d__[i__], &z___ref(1, i__), &c__1); /* L10: */ } dsymm_("Left", uplo, n, m, &c_b6, &b[b_offset], ldb, &z__[z_offset], ldz, &c_b12, &work[1], n); result[1] = dlange_("1", n, m, &work[1], n, &work[1]) / anorm / (*n * ulp); } else if (*itype == 2) { /* Norm of ABZ - ZD */ dsymm_("Left", uplo, n, m, &c_b6, &b[b_offset], ldb, &z__[z_offset], ldz, &c_b7, &work[1], n); i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { dscal_(n, &d__[i__], &z___ref(1, i__), &c__1); /* L20: */ } dsymm_("Left", uplo, n, m, &c_b6, &a[a_offset], lda, &work[1], n, & c_b12, &z__[z_offset], ldz); result[1] = dlange_("1", n, m, &z__[z_offset], ldz, &work[1]) / anorm / (*n * ulp); } else if (*itype == 3) { /* Norm of BAZ - ZD */ dsymm_("Left", uplo, n, m, &c_b6, &a[a_offset], lda, &z__[z_offset], ldz, &c_b7, &work[1], n); i__1 = *m; for (i__ = 1; i__ <= i__1; ++i__) { dscal_(n, &d__[i__], &z___ref(1, i__), &c__1); /* L30: */ } dsymm_("Left", uplo, n, m, &c_b6, &b[b_offset], ldb, &work[1], n, & c_b12, &z__[z_offset], ldz); result[1] = dlange_("1", n, m, &z__[z_offset], ldz, &work[1]) / anorm / (*n * ulp); } return 0; /* End of DDGT01 */ } /* dsgt01_ */
void dsymm(char side, char uplo, int m, int n, double alpha, double *a, int lda, double *b, int ldb, double beta, double *c, int ldc) { dsymm_( &side, &uplo, &m, &n, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); }
/* Subroutine */ int dsygst_(integer *itype, char *uplo, integer *n, doublereal *a, integer *lda, doublereal *b, integer *ldb, integer * info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DSYGST reduces a real symmetric-definite generalized eigenproblem to standard form. If ITYPE = 1, the problem is A*x = lambda*B*x, and A is overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T) If ITYPE = 2 or 3, the problem is A*B*x = lambda*x or B*A*x = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. B must have been previously factorized as U**T*U or L*L**T by DPOTRF. Arguments ========= ITYPE (input) INTEGER = 1: compute inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T); = 2 or 3: compute U*A*U**T or L**T*A*L. UPLO (input) CHARACTER = 'U': Upper triangle of A is stored and B is factored as U**T*U; = 'L': Lower triangle of A is stored and B is factored as L*L**T. N (input) INTEGER The order of the matrices A and B. N >= 0. A (input/output) DOUBLE PRECISION array, dimension (LDA,N) On entry, the symmetric matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A, and the strictly lower triangular part of A is not referenced. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A, and the strictly upper triangular part of A is not referenced. On exit, if INFO = 0, the transformed matrix, stored in the same format as A. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). B (input) DOUBLE PRECISION array, dimension (LDB,N) The triangular factor from the Cholesky factorization of B, as returned by DPOTRF. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static integer c_n1 = -1; static doublereal c_b14 = 1.; static doublereal c_b16 = -.5; static doublereal c_b19 = -1.; static doublereal c_b52 = .5; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; /* Local variables */ static integer k; extern logical lsame_(char *, char *); extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), dsymm_( char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); static logical upper; extern /* Subroutine */ int dtrsm_(char *, char *, char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), dsygs2_( integer *, char *, integer *, doublereal *, integer *, doublereal *, integer *, integer *); static integer kb; extern /* Subroutine */ int dsyr2k_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); static integer nb; extern /* Subroutine */ int xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; /* Function Body */ *info = 0; upper = lsame_(uplo, "U"); if (*itype < 1 || *itype > 3) { *info = -1; } else if (! upper && ! lsame_(uplo, "L")) { *info = -2; } else if (*n < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } if (*info != 0) { i__1 = -(*info); xerbla_("DSYGST", &i__1); return 0; } /* Quick return if possible */ if (*n == 0) { return 0; } /* Determine the block size for this environment. */ nb = ilaenv_(&c__1, "DSYGST", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( ftnlen)1); if (nb <= 1 || nb >= *n) { /* Use unblocked code */ dsygs2_(itype, uplo, n, &a[a_offset], lda, &b[b_offset], ldb, info); } else { /* Use blocked code */ if (*itype == 1) { if (upper) { /* Compute inv(U')*A*inv(U) */ i__1 = *n; i__2 = nb; for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the upper triangle of A(k:n,k:n) */ dsygs2_(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k), ldb, info); if (k + kb <= *n) { i__3 = *n - k - kb + 1; dtrsm_("Left", uplo, "Transpose", "Non-unit", &kb, & i__3, &c_b14, &b_ref(k, k), ldb, &a_ref(k, k + kb), lda); i__3 = *n - k - kb + 1; dsymm_("Left", uplo, &kb, &i__3, &c_b16, &a_ref(k, k), lda, &b_ref(k, k + kb), ldb, &c_b14, &a_ref( k, k + kb), lda); i__3 = *n - k - kb + 1; dsyr2k_(uplo, "Transpose", &i__3, &kb, &c_b19, &a_ref( k, k + kb), lda, &b_ref(k, k + kb), ldb, & c_b14, &a_ref(k + kb, k + kb), lda); i__3 = *n - k - kb + 1; dsymm_("Left", uplo, &kb, &i__3, &c_b16, &a_ref(k, k), lda, &b_ref(k, k + kb), ldb, &c_b14, &a_ref( k, k + kb), lda); i__3 = *n - k - kb + 1; dtrsm_("Right", uplo, "No transpose", "Non-unit", &kb, &i__3, &c_b14, &b_ref(k + kb, k + kb), ldb, & a_ref(k, k + kb), lda); } /* L10: */ } } else { /* Compute inv(L)*A*inv(L') */ i__2 = *n; i__1 = nb; for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the lower triangle of A(k:n,k:n) */ dsygs2_(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k), ldb, info); if (k + kb <= *n) { i__3 = *n - k - kb + 1; dtrsm_("Right", uplo, "Transpose", "Non-unit", &i__3, &kb, &c_b14, &b_ref(k, k), ldb, &a_ref(k + kb, k), lda); i__3 = *n - k - kb + 1; dsymm_("Right", uplo, &i__3, &kb, &c_b16, &a_ref(k, k) , lda, &b_ref(k + kb, k), ldb, &c_b14, &a_ref( k + kb, k), lda); i__3 = *n - k - kb + 1; dsyr2k_(uplo, "No transpose", &i__3, &kb, &c_b19, & a_ref(k + kb, k), lda, &b_ref(k + kb, k), ldb, &c_b14, &a_ref(k + kb, k + kb), lda); i__3 = *n - k - kb + 1; dsymm_("Right", uplo, &i__3, &kb, &c_b16, &a_ref(k, k) , lda, &b_ref(k + kb, k), ldb, &c_b14, &a_ref( k + kb, k), lda); i__3 = *n - k - kb + 1; dtrsm_("Left", uplo, "No transpose", "Non-unit", & i__3, &kb, &c_b14, &b_ref(k + kb, k + kb), ldb, &a_ref(k + kb, k), lda); } /* L20: */ } } } else { if (upper) { /* Compute U*A*U' */ i__1 = *n; i__2 = nb; for (k = 1; i__2 < 0 ? k >= i__1 : k <= i__1; k += i__2) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the upper triangle of A(1:k+kb-1,1:k+kb-1) */ i__3 = k - 1; dtrmm_("Left", uplo, "No transpose", "Non-unit", &i__3, & kb, &c_b14, &b[b_offset], ldb, &a_ref(1, k), lda); i__3 = k - 1; dsymm_("Right", uplo, &i__3, &kb, &c_b52, &a_ref(k, k), lda, &b_ref(1, k), ldb, &c_b14, &a_ref(1, k), lda); i__3 = k - 1; dsyr2k_(uplo, "No transpose", &i__3, &kb, &c_b14, &a_ref( 1, k), lda, &b_ref(1, k), ldb, &c_b14, &a[ a_offset], lda); i__3 = k - 1; dsymm_("Right", uplo, &i__3, &kb, &c_b52, &a_ref(k, k), lda, &b_ref(1, k), ldb, &c_b14, &a_ref(1, k), lda); i__3 = k - 1; dtrmm_("Right", uplo, "Transpose", "Non-unit", &i__3, &kb, &c_b14, &b_ref(k, k), ldb, &a_ref(1, k), lda); dsygs2_(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k), ldb, info); /* L30: */ } } else { /* Compute L'*A*L */ i__2 = *n; i__1 = nb; for (k = 1; i__1 < 0 ? k >= i__2 : k <= i__2; k += i__1) { /* Computing MIN */ i__3 = *n - k + 1; kb = min(i__3,nb); /* Update the lower triangle of A(1:k+kb-1,1:k+kb-1) */ i__3 = k - 1; dtrmm_("Right", uplo, "No transpose", "Non-unit", &kb, & i__3, &c_b14, &b[b_offset], ldb, &a_ref(k, 1), lda); i__3 = k - 1; dsymm_("Left", uplo, &kb, &i__3, &c_b52, &a_ref(k, k), lda, &b_ref(k, 1), ldb, &c_b14, &a_ref(k, 1), lda); i__3 = k - 1; dsyr2k_(uplo, "Transpose", &i__3, &kb, &c_b14, &a_ref(k, 1), lda, &b_ref(k, 1), ldb, &c_b14, &a[a_offset], lda); i__3 = k - 1; dsymm_("Left", uplo, &kb, &i__3, &c_b52, &a_ref(k, k), lda, &b_ref(k, 1), ldb, &c_b14, &a_ref(k, 1), lda); i__3 = k - 1; dtrmm_("Left", uplo, "Transpose", "Non-unit", &kb, &i__3, &c_b14, &b_ref(k, k), ldb, &a_ref(k, 1), lda); dsygs2_(itype, uplo, &kb, &a_ref(k, k), lda, &b_ref(k, k), ldb, info); /* L40: */ } } } } return 0; /* End of DSYGST */ } /* dsygst_ */
/* Right-hand-side equation for density matrix using BLAS */ int RHS_DM_BLAS(realtype t, N_Vector y, N_Vector ydot, void * data) { #ifdef DEBUGf_DM // file for density matrix coeff derivatives in time FILE * dmf; std::cout << "Creating output file for density matrix coefficient derivatives in time.\n"; dmf = fopen("dmf.out", "a"); #endif // data is a pointer to the params struct Params * p; p = (Params *) data; // extract parameters from p double * H = &(p->H)[0]; int N = p->NEQ; int N2 = p->NEQ2; // more compact notation for N_Vectors realtype * yp = N_VGetArrayPointer(y); realtype * ydotp = N_VGetArrayPointer(ydot); // update Hamiltonian if it is time-dependent if (p->torsion || p->laser_on) { // only update if at a new time point if ((t > 0.0) && (t != p->lastTime)) { updateHamiltonian(p, t); // update time point p->lastTime = t; } } // initialize ydot #pragma omp parallel for for (int ii = 0; ii < 2*N2; ii++) { ydotp[ii] = 0.0; } char LEFT = 'l'; char RGHT = 'r'; char UP = 'l'; double ONE = 1.0; double NEG = -1.0; double ZERO = 0.0; // Re(\dot{\rho}) += H*Im(\rho) dsymm_(&LEFT, &UP, &N, &N, &ONE, &H[0], &N, &yp[N2], &N, &ZERO, &ydotp[0], &N); // Re(\dot{\rho}) -= Im(\rho)*H dsymm_(&RGHT, &UP, &N, &N, &NEG, &H[0], &N, &yp[N2], &N, &ONE, &ydotp[0], &N); // Im(\dot{\rho}) += i*Re(\rho)*H dsymm_(&RGHT, &UP, &N, &N, &ONE, &H[0], &N, &yp[0], &N, &ONE, &ydotp[N2], &N); // Im(\dot{\rho}) -= i*H*Re(\rho) dsymm_(&LEFT, &UP, &N, &N, &NEG, &H[0], &N, &yp[0], &N, &ONE, &ydotp[N2], &N); #ifdef DEBUGf_DM fprintf(dmf, "%+.7e", t); for (int ii = 0; ii < N; ii++) { for (int jj = 0; jj < N; jj++) { fprintf(dmf, " (%+.2e,%+.2e)", ydotp[ii*N + jj], ydotp[ii*N + jj + N2]); } } fprintf(dmf, "\n"); std::cout << "Closing output file for density matrix coefficients in time.\n"; fclose(dmf); #endif return 0; }
int main( int argc, char** argv ) { obj_t a, b, c; obj_t c_save; obj_t alpha, beta; dim_t m, n; dim_t p; dim_t p_begin, p_end, p_inc; int m_input, n_input; num_t dt; int r, n_repeats; side_t side; uplo_t uploa; f77_char f77_side; f77_char f77_uploa; double dtime; double dtime_save; double gflops; bli_init(); //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); n_repeats = 3; #ifndef PRINT p_begin = 200; p_end = 2000; p_inc = 200; m_input = -1; n_input = -1; #else p_begin = 16; p_end = 16; p_inc = 1; m_input = 4; n_input = 4; #endif #if 1 //dt = BLIS_FLOAT; dt = BLIS_DOUBLE; #else //dt = BLIS_SCOMPLEX; dt = BLIS_DCOMPLEX; #endif side = BLIS_LEFT; //side = BLIS_RIGHT; uploa = BLIS_LOWER; //uploa = BLIS_UPPER; bli_param_map_blis_to_netlib_side( side, &f77_side ); bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa ); for ( p = p_begin; p <= p_end; p += p_inc ) { if ( m_input < 0 ) m = p * ( dim_t )abs(m_input); else m = ( dim_t ) m_input; if ( n_input < 0 ) n = p * ( dim_t )abs(n_input); else n = ( dim_t ) n_input; bli_obj_create( dt, 1, 1, 0, 0, &alpha ); bli_obj_create( dt, 1, 1, 0, 0, &beta ); if ( bli_is_left( side ) ) bli_obj_create( dt, m, m, 0, 0, &a ); else bli_obj_create( dt, n, n, 0, 0, &a ); bli_obj_create( dt, m, n, 0, 0, &b ); bli_obj_create( dt, m, n, 0, 0, &c ); bli_obj_create( dt, m, n, 0, 0, &c_save ); bli_randm( &a ); bli_randm( &b ); bli_randm( &c ); bli_obj_set_struc( BLIS_HERMITIAN, a ); bli_obj_set_uplo( uploa, a ); // Randomize A, make it densely Hermitian, and zero the unstored // triangle to ensure the implementation reads only from the stored // region. bli_randm( &a ); bli_mkherm( &a ); bli_mktrim( &a ); /* bli_obj_toggle_uplo( a ); bli_obj_inc_diag_off( 1, a ); bli_setm( &BLIS_ZERO, &a ); bli_obj_inc_diag_off( -1, a ); bli_obj_toggle_uplo( a ); bli_obj_set_diag( BLIS_NONUNIT_DIAG, a ); bli_scalm( &BLIS_TWO, &a ); bli_scalm( &BLIS_TWO, &a ); */ bli_setsc( (2.0/1.0), 1.0, &alpha ); bli_setsc( -(1.0/1.0), 0.0, &beta ); bli_copym( &c, &c_save ); dtime_save = 1.0e9; for ( r = 0; r < n_repeats; ++r ) { bli_copym( &c_save, &c ); dtime = bli_clock(); #ifdef PRINT bli_printm( "a", &a, "%4.1f", "" ); bli_printm( "b", &b, "%4.1f", "" ); bli_printm( "c", &c, "%4.1f", "" ); #endif #ifdef BLIS bli_hemm( side, &alpha, &a, &b, &beta, &c ); #else if ( bli_is_float( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); float* alphap = bli_obj_buffer( alpha ); float* ap = bli_obj_buffer( a ); float* bp = bli_obj_buffer( b ); float* betap = bli_obj_buffer( beta ); float* cp = bli_obj_buffer( c ); ssymm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_double( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); double* alphap = bli_obj_buffer( alpha ); double* ap = bli_obj_buffer( a ); double* bp = bli_obj_buffer( b ); double* betap = bli_obj_buffer( beta ); double* cp = bli_obj_buffer( c ); dsymm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_scomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); scomplex* alphap = bli_obj_buffer( alpha ); scomplex* ap = bli_obj_buffer( a ); scomplex* bp = bli_obj_buffer( b ); scomplex* betap = bli_obj_buffer( beta ); scomplex* cp = bli_obj_buffer( c ); chemm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } else if ( bli_is_dcomplex( dt ) ) { f77_int mm = bli_obj_length( c ); f77_int nn = bli_obj_width( c ); f77_int lda = bli_obj_col_stride( a ); f77_int ldb = bli_obj_col_stride( b ); f77_int ldc = bli_obj_col_stride( c ); dcomplex* alphap = bli_obj_buffer( alpha ); dcomplex* ap = bli_obj_buffer( a ); dcomplex* bp = bli_obj_buffer( b ); dcomplex* betap = bli_obj_buffer( beta ); dcomplex* cp = bli_obj_buffer( c ); zhemm_( &f77_side, &f77_uploa, &mm, &nn, alphap, ap, &lda, bp, &ldb, betap, cp, &ldc ); } #endif #ifdef PRINT bli_printm( "c after", &c, "%9.5f", "" ); exit(1); #endif dtime_save = bli_clock_min_diff( dtime_save, dtime ); } if ( bli_is_left( side ) ) gflops = ( 2.0 * m * m * n ) / ( dtime_save * 1.0e9 ); else gflops = ( 2.0 * m * n * n ) / ( dtime_save * 1.0e9 ); if ( bli_is_complex( dt ) ) gflops *= 4.0; #ifdef BLIS printf( "data_hemm_blis" ); #else printf( "data_hemm_%s", BLAS ); #endif printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n", ( unsigned long )(p - p_begin + 1)/p_inc + 1, ( unsigned long )m, ( unsigned long )n, dtime_save, gflops ); bli_obj_free( &alpha ); bli_obj_free( &beta ); bli_obj_free( &a ); bli_obj_free( &b ); bli_obj_free( &c ); bli_obj_free( &c_save ); } bli_finalize(); return 0; }
/* Subroutine */ int dsposv_(char *uplo, integer *n, integer *nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal * x, integer *ldx, doublereal *work, real *swork, integer *iter, integer *info) { /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, work_dim1, work_offset, x_dim1, x_offset, i__1; doublereal d__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ integer i__; doublereal cte, eps, anrm; integer ptsa; doublereal rnrm, xnrm; integer ptsx; extern logical lsame_(char *, char *); integer iiter; extern /* Subroutine */ int daxpy_(integer *, doublereal *, doublereal *, integer *, doublereal *, integer *), dsymm_(char *, char *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dlag2s_(integer *, integer *, doublereal *, integer *, real *, integer *, integer *), slag2d_(integer *, integer *, real *, integer *, doublereal *, integer *, integer *), dlat2s_(char *, integer *, doublereal *, integer *, real *, integer *, integer *); extern doublereal dlamch_(char *); extern integer idamax_(integer *, doublereal *, integer *); extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *); extern doublereal dlansy_(char *, char *, integer *, doublereal *, integer *, doublereal *); extern /* Subroutine */ int dpotrf_(char *, integer *, doublereal *, integer *, integer *), dpotrs_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *, integer *), spotrf_(char *, integer *, real *, integer *, integer *), spotrs_(char *, integer *, integer *, real *, integer *, real *, integer *, integer *); /* -- LAPACK driver routine (version 3.4.0) -- */ /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ /* November 2011 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. Local Scalars .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Parameter adjustments */ work_dim1 = *n; work_offset = 1 + work_dim1; work -= work_offset; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; x_dim1 = *ldx; x_offset = 1 + x_dim1; x -= x_offset; --swork; /* Function Body */ *info = 0; *iter = 0; /* Test the input parameters. */ if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { *info = -1; } else if (*n < 0) { *info = -2; } else if (*nrhs < 0) { *info = -3; } else if (*lda < max(1,*n)) { *info = -5; } else if (*ldb < max(1,*n)) { *info = -7; } else if (*ldx < max(1,*n)) { *info = -9; } if (*info != 0) { i__1 = -(*info); xerbla_("DSPOSV", &i__1); return 0; } /* Quick return if (N.EQ.0). */ if (*n == 0) { return 0; } /* Skip single precision iterative refinement if a priori slower */ /* than double precision factorization. */ if (FALSE_) { *iter = -1; goto L40; } /* Compute some constants. */ anrm = dlansy_("I", uplo, n, &a[a_offset], lda, &work[work_offset]); eps = dlamch_("Epsilon"); cte = anrm * eps * sqrt((doublereal) (*n)) * 1.; /* Set the indices PTSA, PTSX for referencing SA and SX in SWORK. */ ptsa = 1; ptsx = ptsa + *n * *n; /* Convert B from double precision to single precision and store the */ /* result in SX. */ dlag2s_(n, nrhs, &b[b_offset], ldb, &swork[ptsx], n, info); if (*info != 0) { *iter = -2; goto L40; } /* Convert A from double precision to single precision and store the */ /* result in SA. */ dlat2s_(uplo, n, &a[a_offset], lda, &swork[ptsa], n, info); if (*info != 0) { *iter = -2; goto L40; } /* Compute the Cholesky factorization of SA. */ spotrf_(uplo, n, &swork[ptsa], n, info); if (*info != 0) { *iter = -3; goto L40; } /* Solve the system SA*SX = SB. */ spotrs_(uplo, n, nrhs, &swork[ptsa], n, &swork[ptsx], n, info); /* Convert SX back to double precision */ slag2d_(n, nrhs, &swork[ptsx], n, &x[x_offset], ldx, info); /* Compute R = B - AX (R is WORK). */ dlacpy_("All", n, nrhs, &b[b_offset], ldb, &work[work_offset], n); dsymm_("Left", uplo, n, nrhs, &c_b10, &a[a_offset], lda, &x[x_offset], ldx, &c_b11, &work[work_offset], n); /* Check whether the NRHS normwise backward errors satisfy the */ /* stopping criterion. If yes, set ITER=0 and return. */ i__1 = *nrhs; for (i__ = 1; i__ <= i__1; ++i__) { xnrm = (d__1 = x[idamax_(n, &x[i__ * x_dim1 + 1], &c__1) + i__ * x_dim1], f2c_abs(d__1)); rnrm = (d__1 = work[idamax_(n, &work[i__ * work_dim1 + 1], &c__1) + i__ * work_dim1], f2c_abs(d__1)); if (rnrm > xnrm * cte) { goto L10; } } /* If we are here, the NRHS normwise backward errors satisfy the */ /* stopping criterion. We are good to exit. */ *iter = 0; return 0; L10: for (iiter = 1; iiter <= 30; ++iiter) { /* Convert R (in WORK) from double precision to single precision */ /* and store the result in SX. */ dlag2s_(n, nrhs, &work[work_offset], n, &swork[ptsx], n, info); if (*info != 0) { *iter = -2; goto L40; } /* Solve the system SA*SX = SR. */ spotrs_(uplo, n, nrhs, &swork[ptsa], n, &swork[ptsx], n, info); /* Convert SX back to double precision and update the current */ /* iterate. */ slag2d_(n, nrhs, &swork[ptsx], n, &work[work_offset], n, info); i__1 = *nrhs; for (i__ = 1; i__ <= i__1; ++i__) { daxpy_(n, &c_b11, &work[i__ * work_dim1 + 1], &c__1, &x[i__ * x_dim1 + 1], &c__1); } /* Compute R = B - AX (R is WORK). */ dlacpy_("All", n, nrhs, &b[b_offset], ldb, &work[work_offset], n); dsymm_("L", uplo, n, nrhs, &c_b10, &a[a_offset], lda, &x[x_offset], ldx, &c_b11, &work[work_offset], n); /* Check whether the NRHS normwise backward errors satisfy the */ /* stopping criterion. If yes, set ITER=IITER>0 and return. */ i__1 = *nrhs; for (i__ = 1; i__ <= i__1; ++i__) { xnrm = (d__1 = x[idamax_(n, &x[i__ * x_dim1 + 1], &c__1) + i__ * x_dim1], f2c_abs(d__1)); rnrm = (d__1 = work[idamax_(n, &work[i__ * work_dim1 + 1], &c__1) + i__ * work_dim1], f2c_abs(d__1)); if (rnrm > xnrm * cte) { goto L20; } } /* If we are here, the NRHS normwise backward errors satisfy the */ /* stopping criterion, we are good to exit. */ *iter = iiter; return 0; L20: /* L30: */ ; } /* If we are at this place of the code, this is because we have */ /* performed ITER=ITERMAX iterations and never satisified the */ /* stopping criterion, set up the ITER flag accordingly and follow */ /* up on double precision routine. */ *iter = -31; L40: /* Single-precision iterative refinement failed to converge to a */ /* satisfactory solution, so we resort to double precision. */ dpotrf_(uplo, n, &a[a_offset], lda, info); if (*info != 0) { return 0; } dlacpy_("All", n, nrhs, &b[b_offset], ldb, &x[x_offset], ldx); dpotrs_(uplo, n, nrhs, &a[a_offset], lda, &x[x_offset], ldx, info); return 0; /* End of DSPOSV. */ }