void lapack_dgtsv(long dim, long nRHS, double *dl, double *d, double *du, double *b, long ldb) { long info; dgtsv_(&dim, &nRHS, dl, d, du, b, &ldb, &info); if (info != 0) printf("dgtsv = %li\n", info); }
static long dgtsv(long N, long NRHS, double *DL, double *D, double *DU, double *B, long LDB) { extern void dgtsv_(const long *Np, const long *NRHSp, double *DL, double *D, double *DU, double *B, const long *LDBp, long *INFOp); long info; dgtsv_(&N, &NRHS, DL, D, DU, B, &LDB, &info); return info; }
int main (int argc, char* argv []) { mpi::environment env (argc, argv); mpi::communicator world; TestMatrixInitializer init; std::vector<AbstractMatrixInitializer*> mat_inits(10, & init); VacuumCouplingInitializer c_init(& init, 10, world); std::vector<AbstractCouplingInitializer*> c_inits(10, & c_init); ThreeScatterRHSCollection crc(mat_inits, c_inits, 10, world); double test_rhs_storage[100]; double* test_rhs[10]; std::fill_n(test_rhs_storage, 100, 0); for(int i = 0; i < 10; i++) { test_rhs_storage[i*10 + 0] = 1; // test_rhs_storage[i*9 + 8] = 1; test_rhs[i] = & test_rhs_storage[i*10]; } world.barrier(); crc.doLines(test_rhs); int dummy; std::ios::openmode om = (world.rank() == 0) ? std::ios::out : std::ios::app; for(unsigned int il=0; il < 10; il++) { std::ofstream dump("tdtest.txt", om); if (world.rank() != 0) world.recv(world.rank()-1, 0, dummy); for(int i=0; i < 10; i++) { dump << test_rhs[il][i] << " "; } dump.flush(); if (world.rank()+1 < world.size()) { world.send(world.rank()+1, 0, dummy); } if(world.rank() == 4) { dump << std::endl << std::endl; world.send(0, 0, dummy); } if(world.rank() == 0) world.recv(4, 0, dummy); om = std::ios::app; } if(world.rank() == 4) { std::ofstream dump("tdtest.txt", std::ios::app); double herp[50]; double d[50]; double ld[49]; double ud[49]; std::fill_n(herp, 50, 0); std::fill_n(d, 50, 1); std::fill_n(ld, 49, -1.0/3.0); std::fill_n(ud, 49, -1.0/3.0); for(unsigned int i=0; i < 50; i += 10) {herp[i] = 1; // herp[i+8] = 1; } dump << "serial solve: " << std::endl; int ninety=50; int one=1; int info; dgtsv_(& ninety, & one, ud, d, ld, herp, & ninety, & info); for(int i = 0; i < 50; i++) dump << herp[i] << " "; dump << std::endl; } }
/* Real tridiagonal solver * * Returns true on success */ bool tridag(const real *a, const real *b, const real *c, const real *r, real *u, int n) { int nrhs = 1; int info; // Lapack routines overwrite their inputs, so need to copy static int len = 0; static real *dl, *d, *du, *x; if(n > len) { // Allocate more memory (as a single block) if(len > 0) delete[] dl; dl = new real[4*n]; d = dl + n; du = d + n; x = du + n; len = n; } for(int i=0;i<n;i++) { // Diagonal d[i] = b[i]; // Off-diagonal terms if(i != (n-1)) { dl[i] = a[i+1]; du[i] = c[i]; } x[i] = r[i]; } /* LAPACK DGTSV routine. n - Size of the array nrhs - Number of RHS vectors to solve dl - lower band d - diagonal values du - upper band x - input and output values info - output status */ dgtsv_(&n, &nrhs, dl, d, du, x, &n, &info); if(info != 0) { // Some sort of problem output.write("Problem in LAPACK DGTSV routine\n"); return false; } // Copy result back for(int i=0;i<n;i++) { u[i] = x[i]; } return true; }
/* Subroutine */ int dtimgt_(char *line, integer *nm, integer *mval, integer * nns, integer *nsval, integer *nlda, integer *ldaval, doublereal * timmin, doublereal *a, doublereal *b, integer *iwork, doublereal * reslts, integer *ldr1, integer *ldr2, integer *ldr3, integer *nout, ftnlen line_len) { /* Initialized data */ static char subnam[6*4] = "DGTTRF" "DGTTRS" "DGTSV " "DGTSL "; static char transs[1*2] = "N" "T"; /* Format strings */ static char fmt_9998[] = "(1x,a6,\002 timing run not attempted\002,/)"; static char fmt_9997[] = "(/\002 *** Speed of \002,a6,\002 in megaflops " "***\002)"; static char fmt_9996[] = "(5x,\002line \002,i2,\002 with LDA = \002,i5)"; static char fmt_9999[] = "(\002 DGTTRS with TRANS = '\002,a1,\002'\002,/)" ; /* System generated locals */ integer reslts_dim1, reslts_dim2, reslts_dim3, reslts_offset, i__1, i__2, i__3, i__4; /* Builtin functions Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void), s_wsle(cilist *), e_wsle(void); /* Local variables */ static integer ilda, info; static char path[3]; static doublereal time; static integer isub, nrhs, i__, m, n; static char cname[6]; extern doublereal dopgb_(char *, integer *, integer *, integer *, integer *, integer *); static integer laval[1], itran; extern /* Subroutine */ int dgtsl_(integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *); static char trans[1]; extern /* Subroutine */ int dgtsv_(integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *); static doublereal s1, s2; static integer ic, im; extern doublereal dsecnd_(void); extern /* Subroutine */ int atimck_(integer *, char *, integer *, integer *, integer *, integer *, integer *, integer *, ftnlen); extern doublereal dmflop_(doublereal *, doublereal *, integer *); extern /* Subroutine */ int atimin_(char *, char *, integer *, char *, logical *, integer *, integer *, ftnlen, ftnlen, ftnlen), dtimmg_( integer *, integer *, integer *, doublereal *, integer *, integer *, integer *), dprtbl_(char *, char *, integer *, integer *, integer *, integer *, integer *, doublereal *, integer *, integer *, integer *, ftnlen, ftnlen), dgttrf_(integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, integer *); static doublereal untime; static logical timsub[4]; extern /* Subroutine */ int dgttrs_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *); static integer ldb, icl; static doublereal ops; /* Fortran I/O blocks */ static cilist io___8 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___25 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___26 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___27 = { 0, 0, 0, 0, 0 }; static cilist io___29 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___30 = { 0, 0, 0, fmt_9999, 0 }; #define subnam_ref(a_0,a_1) &subnam[(a_1)*6 + a_0 - 6] #define reslts_ref(a_1,a_2,a_3,a_4) reslts[(((a_4)*reslts_dim3 + (a_3))*\ reslts_dim2 + (a_2))*reslts_dim1 + a_1] /* -- LAPACK timing routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= DTIMGT times DGTTRF, -TRS, -SV, and -SL. Arguments ========= LINE (input) CHARACTER*80 The input line that requested this routine. The first six characters contain either the name of a subroutine or a generic path name. The remaining characters may be used to specify the individual routines to be timed. See ATIMIN for a full description of the format of the input line. NM (input) INTEGER The number of values of M contained in the vector MVAL. MVAL (input) INTEGER array, dimension (NM) The values of the matrix size M. NNS (input) INTEGER The number of values of NRHS contained in the vector NSVAL. NSVAL (input) INTEGER array, dimension (NNS) The values of the number of right hand sides NRHS. NLDA (input) INTEGER The number of values of LDA contained in the vector LDAVAL. LDAVAL (input) INTEGER array, dimension (NLDA) The values of the leading dimension of the array A. TIMMIN (input) DOUBLE PRECISION The minimum time a subroutine will be timed. A (workspace) DOUBLE PRECISION array, dimension (NMAX*4) where NMAX is the maximum value permitted for N. B (workspace) DOUBLE PRECISION array, dimension (LDAMAX*NMAX) IWORK (workspace) INTEGER array, dimension (NMAX) RESLTS (output) DOUBLE PRECISION array, dimension (LDR1,LDR2,LDR3,NSUBS+1) The timing results for each subroutine over the relevant values of N. LDR1 (input) INTEGER The first dimension of RESLTS. LDR1 >= 1. LDR2 (input) INTEGER The second dimension of RESLTS. LDR2 >= max(1,NM). LDR3 (input) INTEGER The third dimension of RESLTS. LDR3 >= max(1,NLDA). NOUT (input) INTEGER The unit number for output. ===================================================================== Parameter adjustments */ --mval; --nsval; --ldaval; --a; --b; --iwork; reslts_dim1 = *ldr1; reslts_dim2 = *ldr2; reslts_dim3 = *ldr3; reslts_offset = 1 + reslts_dim1 * (1 + reslts_dim2 * (1 + reslts_dim3 * 1) ); reslts -= reslts_offset; /* Function Body Extract the timing request from the input line. */ s_copy(path, "Double precision", (ftnlen)1, (ftnlen)16); s_copy(path + 1, "GT", (ftnlen)2, (ftnlen)2); atimin_(path, line, &c__4, subnam, timsub, nout, &info, (ftnlen)3, ( ftnlen)80, (ftnlen)6); if (info != 0) { goto L180; } /* Check that N <= LDA for the input values. */ for (isub = 2; isub <= 4; ++isub) { if (! timsub[isub - 1]) { goto L10; } s_copy(cname, subnam_ref(0, isub), (ftnlen)6, (ftnlen)6); atimck_(&c__2, cname, nm, &mval[1], nlda, &ldaval[1], nout, &info, ( ftnlen)6); if (info > 0) { io___8.ciunit = *nout; s_wsfe(&io___8); do_fio(&c__1, cname, (ftnlen)6); e_wsfe(); timsub[isub - 1] = FALSE_; } L10: ; } /* Do for each value of M: */ i__1 = *nm; for (im = 1; im <= i__1; ++im) { m = mval[im]; n = max(m,1); /* Time DGTTRF */ if (timsub[0]) { i__2 = n * 3; dtimmg_(&c__12, &m, &m, &a[1], &i__2, &c__0, &c__0); ic = 0; s1 = dsecnd_(); L20: dgttrf_(&m, &a[1], &a[n], &a[n * 2], &a[n * 3 - 2], &iwork[1], & info); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { i__2 = n * 3; dtimmg_(&c__12, &m, &m, &a[1], &i__2, &c__0, &c__0); goto L20; } /* Subtract the time used in DTIMMG. */ icl = 1; s1 = dsecnd_(); L30: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { i__2 = n * 3; dtimmg_(&c__12, &m, &m, &a[1], &i__2, &c__0, &c__0); goto L30; } time = (time - untime) / (doublereal) ic; ops = dopgb_("DGTTRF", &m, &m, &c__1, &c__1, &iwork[1]) ; reslts_ref(1, im, 1, 1) = dmflop_(&ops, &time, &info); } else if (timsub[1]) { i__2 = n * 3; dtimmg_(&c__12, &m, &m, &a[1], &i__2, &c__0, &c__0); } /* Generate another matrix and factor it using DGTTRF so that the factored form can be used in timing the other routines. */ if (ic != 1) { dgttrf_(&m, &a[1], &a[n], &a[n * 2], &a[n * 3 - 2], &iwork[1], & info); } /* Time DGTTRS */ if (timsub[1]) { for (itran = 1; itran <= 2; ++itran) { *(unsigned char *)trans = *(unsigned char *)&transs[itran - 1] ; i__2 = *nlda; for (ilda = 1; ilda <= i__2; ++ilda) { ldb = ldaval[ilda]; i__3 = *nns; for (i__ = 1; i__ <= i__3; ++i__) { nrhs = nsval[i__]; dtimmg_(&c__0, &m, &nrhs, &b[1], &ldb, &c__0, &c__0); ic = 0; s1 = dsecnd_(); L40: dgttrs_(trans, &m, &nrhs, &a[1], &a[n], &a[n * 2], &a[ n * 3 - 2], &iwork[1], &b[1], &ldb, &info); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { dtimmg_(&c__0, &m, &nrhs, &b[1], &ldb, &c__0, & c__0); goto L40; } /* Subtract the time used in DTIMMG. */ icl = 1; s1 = dsecnd_(); L50: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { dtimmg_(&c__0, &m, &nrhs, &b[1], &ldb, &c__0, & c__0); goto L50; } time = (time - untime) / (doublereal) ic; ops = dopgb_("DGTTRS", &m, &nrhs, &c__0, &c__0, & iwork[1]); if (itran == 1) { reslts_ref(i__, im, ilda, 2) = dmflop_(&ops, & time, &info); } else { reslts_ref(i__, im, ilda, 5) = dmflop_(&ops, & time, &info); } /* L60: */ } /* L70: */ } /* L80: */ } } if (timsub[2]) { i__2 = *nlda; for (ilda = 1; ilda <= i__2; ++ilda) { ldb = ldaval[ilda]; i__3 = *nns; for (i__ = 1; i__ <= i__3; ++i__) { nrhs = nsval[i__]; i__4 = n * 3; dtimmg_(&c__12, &m, &m, &a[1], &i__4, &c__0, &c__0); dtimmg_(&c__0, &m, &nrhs, &b[1], &ldb, &c__0, &c__0); ic = 0; s1 = dsecnd_(); L90: dgtsv_(&m, &nrhs, &a[1], &a[n], &a[n * 2], &b[1], &ldb, & info); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { i__4 = n * 3; dtimmg_(&c__12, &m, &m, &a[1], &i__4, &c__0, &c__0); dtimmg_(&c__0, &m, &nrhs, &b[1], &ldb, &c__0, &c__0); goto L90; } /* Subtract the time used in DTIMMG. */ icl = 1; s1 = dsecnd_(); L100: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { i__4 = n * 3; dtimmg_(&c__12, &m, &m, &a[1], &i__4, &c__0, &c__0); dtimmg_(&c__0, &m, &nrhs, &b[1], &ldb, &c__0, &c__0); goto L100; } time = (time - untime) / (doublereal) ic; ops = dopgb_("DGTSV ", &m, &nrhs, &c__0, &c__0, &iwork[1]); reslts_ref(i__, im, ilda, 3) = dmflop_(&ops, &time, &info) ; /* L110: */ } /* L120: */ } } if (timsub[3]) { i__2 = n * 3; dtimmg_(&c__12, &m, &m, &a[1], &i__2, &c__0, &c__0); dtimmg_(&c__0, &m, &c__1, &b[1], &n, &c__0, &c__0); ic = 0; s1 = dsecnd_(); L130: dgtsl_(&m, &a[1], &a[n], &a[n * 2], &b[1], &info); s2 = dsecnd_(); time = s2 - s1; ++ic; if (time < *timmin) { i__2 = n * 3; dtimmg_(&c__12, &m, &m, &a[1], &i__2, &c__0, &c__0); dtimmg_(&c__0, &m, &c__1, &b[1], &ldb, &c__0, &c__0); goto L130; } /* Subtract the time used in DTIMMG. */ icl = 1; s1 = dsecnd_(); L140: s2 = dsecnd_(); untime = s2 - s1; ++icl; if (icl <= ic) { i__2 = n * 3; dtimmg_(&c__12, &m, &m, &a[1], &i__2, &c__0, &c__0); dtimmg_(&c__0, &m, &c__1, &b[1], &ldb, &c__0, &c__0); goto L140; } time = (time - untime) / (doublereal) ic; ops = dopgb_("DGTSV ", &m, &c__1, &c__0, &c__0, &iwork[1]); reslts_ref(1, im, 1, 4) = dmflop_(&ops, &time, &info); } /* L150: */ } /* Print a table of results for each timed routine. */ for (isub = 1; isub <= 4; ++isub) { if (! timsub[isub - 1]) { goto L170; } io___25.ciunit = *nout; s_wsfe(&io___25); do_fio(&c__1, subnam_ref(0, isub), (ftnlen)6); e_wsfe(); if (*nlda > 1 && (timsub[1] || timsub[2])) { i__1 = *nlda; for (i__ = 1; i__ <= i__1; ++i__) { io___26.ciunit = *nout; s_wsfe(&io___26); do_fio(&c__1, (char *)&i__, (ftnlen)sizeof(integer)); do_fio(&c__1, (char *)&ldaval[i__], (ftnlen)sizeof(integer)); e_wsfe(); /* L160: */ } } io___27.ciunit = *nout; s_wsle(&io___27); e_wsle(); if (isub == 1) { dprtbl_(" ", "N", &c__1, laval, nm, &mval[1], &c__1, &reslts[ reslts_offset], ldr1, ldr2, nout, (ftnlen)1, (ftnlen)1); } else if (isub == 2) { io___29.ciunit = *nout; s_wsfe(&io___29); do_fio(&c__1, "N", (ftnlen)1); e_wsfe(); dprtbl_("NRHS", "N", nns, &nsval[1], nm, &mval[1], nlda, & reslts_ref(1, 1, 1, 2), ldr1, ldr2, nout, (ftnlen)4, ( ftnlen)1); io___30.ciunit = *nout; s_wsfe(&io___30); do_fio(&c__1, "T", (ftnlen)1); e_wsfe(); dprtbl_("NRHS", "N", nns, &nsval[1], nm, &mval[1], nlda, & reslts_ref(1, 1, 1, 5), ldr1, ldr2, nout, (ftnlen)4, ( ftnlen)1); } else if (isub == 3) { dprtbl_("NRHS", "N", nns, &nsval[1], nm, &mval[1], nlda, & reslts_ref(1, 1, 1, 3), ldr1, ldr2, nout, (ftnlen)4, ( ftnlen)1); } else if (isub == 4) { dprtbl_(" ", "N", &c__1, laval, nm, &mval[1], &c__1, &reslts_ref( 1, 1, 1, 4), ldr1, ldr2, nout, (ftnlen)1, (ftnlen)1); } L170: ; } L180: return 0; /* End of DTIMGT */ } /* dtimgt_ */
void SingleWell1DLinear::LinearMatSolve() { int indxOff = SolveNodeOffset(); for (int i = 0; i < nequations; i++) fracRhs[i] = fracSSolve[i] * nodePressure[i + indxOff]; if (control.IsDual()) AddSCRhsTerm(matrix); if (control.IsLeaky()) { if (leakageType == ltUpperLower) { AddSCRhsTerm(upperLeak); AddSCRhsTerm(lowerLeak); } else AddSCRhsTerm(singleLeak); } fracRhs[nequations - 1] -= externalBoundaryTerm; if (currentSeqIsFixed) fracRhs[0] += currSeqTZ.tzPressure * wellDTerm; else fracRhs[0] += GetWellBCRhs(); #ifdef LAPACK //for LAPACK, just use the simple tri-diagonal solver. int N = nequations; int nrhs = 1; double *DL = new double[N-1]; double *D = new double[N]; double *DU = new double[N-1]; double *B = new double[N]; for(int i = 0; i < N-1; i++) { DL[i]=fracSolveUpper[i]; DU[i]=fracSolveUpper[i]; D[i]=fracSolveDiag[i]; B[i]=fracRhs[i]; } D[N-1]=fracSolveDiag[N-1]; B[N-1]=fracRhs[N-1]; int ldb = N; int info; dgtsv_(&N, &nrhs, DL, D, DU, B, &ldb, &info); if(info == 0) { for(int i = 0; i < N; i++) { nodePressure[i]=B[i]; } } else //info != 0 { throw SimError("SingleWell1DLinear::LinearMatSolve - failed tri-diagonal solve", SimError::seSemiFatal); } delete [] DL; delete [] DU; delete [] D; delete [] B; #else // LAPACK ThomasSolve(fracSolveDiag, fracSolveUpper, fracWork, fracRhs, nodePressure, nequations); #endif // LAPACK if (currentSeqIsFixed) { for (int i = nequations; i > 0; i--) nodePressure[i] = nodePressure[i - 1]; nodePressure[0] = currSeqTZ.tzPressure; } if (control.IsDual()) SCGaussSolve(matrix); if (control.IsLeaky()) { if (leakageType == ltUpperLower) { SCGaussSolve(upperLeak); SCGaussSolve(lowerLeak); } else SCGaussSolve(singleLeak); } }
void GravityColumnSolver<Model>::solveSingleColumn(const std::vector<int>& column_cells, const double dt, std::vector<double>& s, std::vector<double>& sol_vec) { // This is written only to work with SinglePointUpwindTwoPhase, // not with arbitrary problem models. const int col_size = column_cells.size(); if (col_size == 1) { sol_vec[column_cells[0]] = 0.0; return; } StateWithZeroFlux state(s); // This holds s by reference. // Assemble. std::vector<double> tridiag_matrix_data(3*col_size - 2, 0.0); double* DU = &tridiag_matrix_data[0]; double* D = DU + col_size - 1; double* DL = D + col_size; std::vector<double> rhs(col_size, 0.0); for (int ci = 0; ci < col_size; ++ci) { double rescontrib, j1contrib, j2contrib; const int cell = column_cells[ci]; const int prev_cell = (ci == 0) ? -999 : column_cells[ci - 1]; const int next_cell = (ci == col_size - 1) ? -999 : column_cells[ci + 1]; // model_.initResidual(cell, F); for (int j = grid_.cell_facepos[cell]; j < grid_.cell_facepos[cell+1]; ++j) { const int face = grid_.cell_faces[j]; const int c1 = grid_.face_cells[2*face + 0]; const int c2 = grid_.face_cells[2*face + 1]; if (c1 == prev_cell || c2 == prev_cell || c1 == next_cell || c2 == next_cell) { j1contrib = j2contrib = rescontrib = 0.0; model_.fluxConnection(state, grid_, dt, cell, face, &j1contrib, &j2contrib, &rescontrib); if (c1 == prev_cell || c2 == prev_cell) { DL[ci-1] += j2contrib; } else { ASSERT(c1 == next_cell || c2 == next_cell); DU[ci] += j2contrib; } D[ci] += j1contrib; rhs[ci] += rescontrib; } } j1contrib = rescontrib = 0.0; model_.accumulation(grid_, cell, &j1contrib, &rescontrib); D[ci] += j1contrib; rhs[ci] += rescontrib; } // model_.sourceTerms(); // Not needed // Solve. const MAT_SIZE_T num_rhs = 1, colSize = col_size; MAT_SIZE_T info = 0; // Solution will be written to rhs. dgtsv_(&colSize, &num_rhs, DL, D, DU, &rhs[0], &colSize, &info); if (info != 0) { THROW("Lapack reported error in dgtsv: " << info); } for (int ci = 0; ci < col_size; ++ci) { sol_vec[column_cells[ci]] = -rhs[ci]; } }