void eqnsys<nr_type_t>::solve_gauss (void) { nr_double_t MaxPivot; nr_type_t f; int i, c, r, pivot; // triangulate the matrix for (i = 0; i < N; i++) { // find maximum column value for pivoting for (MaxPivot = 0, pivot = r = i; r < N; r++) { if (abs (A_(r, i)) > MaxPivot) { MaxPivot = abs (A_(r, i)); pivot = r; } } // exchange rows if necessary assert (MaxPivot != 0); if (i != pivot) { A->exchangeRows (i, pivot); B->exchangeRows (i, pivot); } // compute new rows and columns for (r = i + 1; r < N; r++) { f = A_(r, i) / A_(i, i); for (c = i + 1; c < N; c++) A_(r, c) -= f * A_(i, c); B_(r) -= f * B_(i); } } // backward substitution for (i = N - 1; i >= 0; i--) { f = B_(i); for (c = i + 1; c < N; c++) f -= A_(i, c) * X_(c); X_(i) = f / A_(i, i); } }
void eqnsys<nr_type_t>::substitute_qr_householder_ls (void) { int c, r; nr_type_t f; // forward substitution in order to solve R'X = B for (r = 0; r < N; r++) { for (f = B_(r), c = 0; c < r; c++) f -= A_(c, r) * B_(c); if (abs (A_(r, r)) > std::numeric_limits<nr_double_t>::epsilon()) B_(r) = f / A_(r, r); else B_(r) = 0; } // compute the least square solution QX for (c = N - 1; c >= 0; c--) { if (T_(c) != 0) { // scalar product u' * B for (f = B_(c), r = c + 1; r < N; r++) f += cond_conj (A_(r, c)) * B_(r); // z - T * f * u_k f *= T_(c); B_(c) -= f; for (r = c + 1; r < N; r++) B_(r) -= f * A_(r, c); } } // permute solution vector for (r = 0; r < N; r++) X_(cMap[r]) = B_(r); }
static void evaluate(const Shell &A, const Shell &B, const Shell &C, const Shell &D, double *Q) { adapter::rysq::Shell A_(A), B_(B), C_(C), D_(D); //rysq::Quartet<rysq::Shell> quartet(A_, B_, C_, D_); boost::array<Center,4> centers = {{ A.center(), B.center(), C.center(), D.center() }}; rysq::Eri eri(rysq::Quartet<rysq::Shell>(A_, B_, C_, D_)); eri(centers, Q); }
void eqnsys<nr_type_t>::solve_gauss_jordan (void) { nr_double_t MaxPivot; nr_type_t f; int i, c, r, pivot; // create the eye matrix for (i = 0; i < N; i++) { // find maximum column value for pivoting for (MaxPivot = 0, pivot = r = i; r < N; r++) { if (abs (A_(r, i)) > MaxPivot) { MaxPivot = abs (A_(r, i)); pivot = r; } } // exchange rows if necessary assert (MaxPivot != 0); if (i != pivot) { A->exchangeRows (i, pivot); B->exchangeRows (i, pivot); } // compute current row f = A_(i, i); for (c = i + 1; c < N; c++) A_(i, c) /= f; B_(i) /= f; // compute new rows and columns for (r = 0; r < N; r++) { if (r != i) { f = A_(r, i); for (c = i + 1; c < N; c++) A_(r, c) -= f * A_(i, c); B_(r) -= f * B_(i); } } } // right hand side is now the solution *X = *B; }
void eqnsys<nr_type_t>::substitute_qrh (void) { int c, r; nr_type_t f; // form the new right hand side Q'B for (c = 0; c < N - 1; c++) { // scalar product u_k^T * B for (f = 0, r = c; r < N; r++) f += cond_conj (A_(r, c)) * B_(r); // z - 2 * f * u_k for (r = c; r < N; r++) B_(r) -= 2.0 * f * A_(r, c); } // backward substitution in order to solve RX = Q'B for (r = N - 1; r >= 0; r--) { f = B_(r); for (c = r + 1; c < N; c++) f -= A_(r, c) * X_(cMap[c]); if (abs (R_(r)) > std::numeric_limits<nr_double_t>::epsilon()) X_(cMap[r]) = f / R_(r); else X_(cMap[r]) = 0; } }
void eqnsys<nr_type_t>::substitute_qr_householder (void) { int c, r; nr_type_t f; // form the new right hand side Q'B for (c = 0; c < N; c++) { if (T_(c) != 0) { // scalar product u' * B for (f = B_(c), r = c + 1; r < N; r++) f += cond_conj (A_(r, c)) * B_(r); // z - T * f * u f *= cond_conj (T_(c)); B_(c) -= f; for (r = c + 1; r < N; r++) B_(r) -= f * A_(r, c); } } // backward substitution in order to solve RX = Q'B for (r = N - 1; r >= 0; r--) { for (f = B_(r), c = r + 1; c < N; c++) f -= A_(r, c) * X_(cMap[c]); if (abs (A_(r, r)) > std::numeric_limits<nr_double_t>::epsilon()) X_(cMap[r]) = f / A_(r, r); else X_(cMap[r]) = 0; } }
void eqnsys<nr_type_t>::substitute_lu_doolittle (void) { nr_type_t f; int i, c; // forward substitution in order to solve LY = B for (i = 0; i < N; i++) { f = B_(rMap[i]); for (c = 0; c < i; c++) f -= A_(i, c) * X_(c); // remember that the Lii diagonal are ones only in Doolittle's definition X_(i) = f; } // backward substitution in order to solve UX = Y for (i = N - 1; i >= 0; i--) { f = X_(i); for (c = i + 1; c < N; c++) f -= A_(i, c) * X_(c); X_(i) = f / A_(i, i); } }
void eqnsys<nr_type_t>::substitute_svd (void) { int c, r; nr_type_t f; // calculate U'B for (c = 0; c < N; c++) { f = 0.0; // non-zero result only if S is non-zero if (S_(c) != 0.0) { for (r = 0; r < N; r++) f += cond_conj (U_(r, c)) * B_(r); // this is the divide by S f /= S_(c); } R_(c) = f; } // matrix multiply by V to get the final solution for (r = 0; r < N; r++) { for (f = 0.0, c = 0; c < N; c++) f += cond_conj (V_(c, r)) * R_(c); X_(r) = f; } }
void test_matrix() { //matrix.h vs. Eigen //for timing int n = (int) 1e6; timeval t0, t1; const int SIZE = 10; //matrix.h matrices Real A[SIZE*SIZE]; Real B[SIZE*SIZE]; Real B0[SIZE*SIZE]; //backup, to initialize Eigen Real b[SIZE]; for (int i=0; i<SIZE*SIZE; i++) { A[i] = (Real) i+1; B[i] = 1; B0[i] = B[i]; } for (int i=0; i<SIZE; i++) { b[i] = 1; } //std::cout << "A=\n"; printMatReal(SIZE,SIZE,A,-1,-1); //std::cout << "B=\n"; printMatReal(SIZE,SIZE,B,-1,-1); //std::cout << "b=\n"; printMatReal(SIZE,1,b,-1,-1); //matrix.h int ri = 2; int ci = 3; int brows = SIZE/2; int bcols = SIZE/2; Real val = 2.0; //Real m = 1.0 + 1e-6; Real m = 1.0; gettimeofday(&t0, NULL); for (int i=0; i<n; i++) { //setMat(SIZE,SIZE,val,B); //setMatRow(SIZE,SIZE,ri,val,B); //setMatCol(SIZE,ci,val,B); //setMatBlock(SIZE,ri,ci,brows,bcols,val,B); //mulcMat(SIZE,SIZE,m,B); //mulcMatRow(SIZE,SIZE,ri,m,B); //mulcMatCol(SIZE,ci,m,B); //mulcMatBlock(SIZE,ri,ci,brows,bcols,m,B); //copyMat(SIZE,SIZE,A,B); //copyMatRow(SIZE,SIZE,ri,A,SIZE,ri,B); //copyMatCol(SIZE,ci,A,ci,B); //copyMatBlock(SIZE,ri,ci,brows,bcols,A, SIZE,ri,ci,B); //copyTMat(SIZE,SIZE,A,B); //addmMat(SIZE,SIZE,A,m,B); //addmMatRow(SIZE,SIZE,ri,A,SIZE,ri,m,B); //addmMatCol(SIZE,ci,A,ci,m,B); //addmMatBlock(SIZE,ri,ci,brows,bcols,A, SIZE,ri,ci,m,B); //multMatVec(SIZE,SIZE,A,b,m,B); //multMatTVec(SIZE,SIZE,A,b,m,B); //multMatMat(SIZE,SIZE,A,SIZE,A,m,B); multMatTMat(SIZE,SIZE,A,SIZE,A,m,B); } gettimeofday(&t1, NULL); std::cout << "(matrix.h) B=\n"; printMatReal(SIZE,SIZE,B,0,-1); std::cout << "iterations: " << (Real) n << std::endl; std::cout << "clock (sec): " << tosec(t1)-tosec(t0) << std::endl; std::cout << std::endl; if (1) { //Eigen //dynamic Eigen matrices Eigen::Matrix<Real,Eigen::Dynamic,Eigen::Dynamic> A_(SIZE,SIZE); Eigen::Matrix<Real,Eigen::Dynamic,Eigen::Dynamic> B_(SIZE,SIZE); Eigen::Matrix<Real,Eigen::Dynamic,Eigen::Dynamic> b_(SIZE,1); //fixed Eigen matrices //Eigen::Matrix<Real,SIZE,SIZE> A_; //Eigen::Matrix<Real,SIZE,SIZE> B_; //Eigen::Matrix<Real,SIZE,1> b_; memcpy(A_.data(), A, sizeof(Real)*SIZE*SIZE); memcpy(B_.data(), B0, sizeof(Real)*SIZE*SIZE); memcpy(b_.data(), b, sizeof(Real)*SIZE); //std::cout << "Eigen:\n"; //std::cout << "A_=\n" << A_ << std::endl; //std::cout << "B_=\n" << B_ << std::endl; //std::cout << "b_=\n" << b_ << std::endl; gettimeofday(&t0, NULL); for (int i=0; i<n; i++) { //B_.setConstant(val); //B_.row(ri).setConstant(val); //B_.col(ci).setConstant(val); //B_.block(ri,ci,brows,bcols).setConstant(val); //B_ *= m; //B_.row(ri) *= m; //B_.col(ci) *= m; //B_.block(ri,ci,brows,bcols) *= m; //B_ = A_; //B_.row(ri) = A_.row(ri); //B_.col(ci) = A_.col(ci); //B_.block(ri,ci,brows,bcols) = A_.block(ri,ci,brows,bcols); //B_ = A_.transpose(); //B_ += (A_ * m); //B_.row(ri) += (A_.row(ri) * m); //B_.col(ci) += (A_.col(ci) * m); //B_.block(ri,ci,brows,bcols) += (A_.block(ri,ci,brows,bcols) * m); //B_.col(0) = A_*b_; //B_.col(0) = A_.transpose()*b_; //B_ = A_*A_; B_ = A_.transpose()*A_; } gettimeofday(&t1, NULL); std::cout << "(Eigen) B=\n" << B_ << std::endl; std::cout << "iterations: " << (Real) n << std::endl; std::cout << "clock (sec): " << tosec(t1)-tosec(t0) << std::endl; } }
static int gk104_top_oneinit(struct nvkm_top *top) { struct nvkm_subdev *subdev = &top->subdev; struct nvkm_device *device = subdev->device; struct nvkm_top_device *info = NULL; u32 data, type, inst; int i; for (i = 0; i < 64; i++) { if (!info) { if (!(info = nvkm_top_device_new(top))) return -ENOMEM; type = ~0; inst = 0; } data = nvkm_rd32(device, 0x022700 + (i * 0x04)); nvkm_trace(subdev, "%02x: %08x\n", i, data); switch (data & 0x00000003) { case 0x00000000: /* NOT_VALID */ continue; case 0x00000001: /* DATA */ inst = (data & 0x3c000000) >> 26; info->addr = (data & 0x00fff000); info->fault = (data & 0x000000f8) >> 3; break; case 0x00000002: /* ENUM */ if (data & 0x00000020) info->engine = (data & 0x3c000000) >> 26; if (data & 0x00000010) info->runlist = (data & 0x01e00000) >> 21; if (data & 0x00000008) info->intr = (data & 0x000f8000) >> 15; if (data & 0x00000004) info->reset = (data & 0x00003e00) >> 9; break; case 0x00000003: /* ENGINE_TYPE */ type = (data & 0x7ffffffc) >> 2; break; } if (data & 0x80000000) continue; /* Translate engine type to NVKM engine identifier. */ #define A_(A) if (inst == 0) info->index = NVKM_ENGINE_##A #define B_(A) if (inst + NVKM_ENGINE_##A##0 < NVKM_ENGINE_##A##_LAST + 1) \ info->index = NVKM_ENGINE_##A##0 + inst switch (type) { case 0x00000000: A_(GR ); break; case 0x00000001: A_(CE0 ); break; case 0x00000002: A_(CE1 ); break; case 0x00000003: A_(CE2 ); break; case 0x00000008: A_(MSPDEC); break; case 0x00000009: A_(MSPPP ); break; case 0x0000000a: A_(MSVLD ); break; case 0x0000000b: A_(MSENC ); break; case 0x0000000c: A_(VIC ); break; case 0x0000000d: A_(SEC ); break; case 0x0000000e: B_(NVENC ); break; case 0x0000000f: A_(NVENC1); break; case 0x00000010: A_(NVDEC ); break; case 0x00000013: B_(CE ); break; break; default: break; } nvkm_debug(subdev, "%02x.%d (%8s): addr %06x fault %2d " "engine %2d runlist %2d intr %2d " "reset %2d\n", type, inst, info->index == NVKM_SUBDEV_NR ? NULL : nvkm_subdev_name[info->index], info->addr, info->fault, info->engine, info->runlist, info->intr, info->reset); info = NULL; } return 0; }
void eqnsys<nr_type_t>::solve_sor (void) { nr_type_t f; int error, conv, i, c, r; int MaxIter = N; // -> less than N^3 operations nr_double_t reltol = 1e-4; nr_double_t abstol = NR_TINY; nr_double_t diff, crit, l = 1, d, s; // ensure that all diagonal values are non-zero ensure_diagonal (); // try to raise diagonal dominance preconditioner (); // decide here about possible convergence if ((crit = convergence_criteria ()) >= 1) { #if DEBUG && 0 logprint (LOG_STATUS, "NOTIFY: convergence criteria: %g >= 1 (%dx%d)\n", crit, N, N); #endif //solve_lu (); //return; } // normalize the equation system to have ones on its diagonal for (r = 0; r < N; r++) { f = A_(r, r); assert (f != 0); // singular matrix for (c = 0; c < N; c++) A_(r, c) /= f; B_(r) /= f; } // the current X vector is a good initial guess for the iteration tvector<nr_type_t> * Xprev = new tvector<nr_type_t> (*X); // start iterating here i = 0; error = 0; do { // compute new solution vector for (r = 0; r < N; r++) { for (f = 0, c = 0; c < N; c++) { if (c < r) f += A_(r, c) * X_(c); else if (c > r) f += A_(r, c) * Xprev->get (c); } X_(r) = (1 - l) * Xprev->get (r) + l * (B_(r) - f); } // check for convergence for (s = 0, d = 0, conv = 1, r = 0; r < N; r++) { diff = abs (X_(r) - Xprev->get (r)); if (diff >= abstol + reltol * abs (X_(r))) { conv = 0; break; } d += diff; s += abs (X_(r)); if (!std::isfinite (diff)) { error++; break; } } if (!error) { // adjust relaxation based on average errors if ((s == 0 && d == 0) || d >= abstol * N + reltol * s) { // values <= 1 -> non-convergence to convergence if (l >= 0.6) l -= 0.1; if (l >= 1.0) l = 1.0; } else { // values >= 1 -> faster convergence if (l < 1.5) l += 0.01; if (l < 1.0) l = 1.0; } } // save last values *Xprev = *X; } while (++i < MaxIter && !conv); delete Xprev; if (!conv || error) { logprint (LOG_ERROR, "WARNING: no convergence after %d sor iterations (l = %g)\n", i, l); solve_lu_crout (); } #if DEBUG && 0 else { logprint (LOG_STATUS, "NOTIFY: sor convergence after %d iterations\n", i); } #endif }
void eqnsys<nr_type_t>::solve_iterative (void) { nr_type_t f; int error, conv, i, c, r; int MaxIter = N; // -> less than N^3 operations nr_double_t reltol = 1e-4; nr_double_t abstol = NR_TINY; nr_double_t diff, crit; // ensure that all diagonal values are non-zero ensure_diagonal (); // try to raise diagonal dominance preconditioner (); // decide here about possible convergence if ((crit = convergence_criteria ()) >= 1) { #if DEBUG && 0 logprint (LOG_STATUS, "NOTIFY: convergence criteria: %g >= 1 (%dx%d)\n", crit, N, N); #endif //solve_lu (); //return; } // normalize the equation system to have ones on its diagonal for (r = 0; r < N; r++) { f = A_(r, r); assert (f != 0); // singular matrix for (c = 0; c < N; c++) A_(r, c) /= f; B_(r) /= f; } // the current X vector is a good initial guess for the iteration tvector<nr_type_t> * Xprev = new tvector<nr_type_t> (*X); // start iterating here i = 0; error = 0; do { // compute new solution vector for (r = 0; r < N; r++) { for (f = 0, c = 0; c < N; c++) { if (algo == ALGO_GAUSS_SEIDEL) { // Gauss-Seidel if (c < r) f += A_(r, c) * X_(c); else if (c > r) f += A_(r, c) * Xprev->get (c); } else { // Jacobi if (c != r) f += A_(r, c) * Xprev->get (c); } } X_(r) = B_(r) - f; } // check for convergence for (conv = 1, r = 0; r < N; r++) { diff = abs (X_(r) - Xprev->get (r)); if (diff >= abstol + reltol * abs (X_(r))) { conv = 0; break; } if (!std::isfinite (diff)) { error++; break; } } // save last values *Xprev = *X; } while (++i < MaxIter && !conv); delete Xprev; if (!conv || error) { logprint (LOG_ERROR, "WARNING: no convergence after %d %s iterations\n", i, algo == ALGO_JACOBI ? "jacobi" : "gauss-seidel"); solve_lu_crout (); } #if DEBUG && 0 else { logprint (LOG_STATUS, "NOTIFY: %s convergence after %d iterations\n", algo == ALGO_JACOBI ? "jacobi" : "gauss-seidel", i); } #endif }
void Flush() { if( numQueued_ == 0 ) return; auto YActive = Y_( ALL, IR(0,numQueued_) ); Matrix<Base<Field>> colNorms; if( useTranspose_ ) { // TODO(poulson): Add this as an option /* Timer timer; timer.Start(); BatchTransposedSparseToCoordinates ( NTrans_, YActive, VCand_, blocksize_ ); const double transformTime = timer.Stop(); const double n = YActive.Height(); const double transformGflops = double(numQueued_)*n*n/(1.e9*transformTime); Output (numQueued_," transforms: ",timer.Stop()," seconds (", transformGflops," GFlop/s"); timer.Start(); colNorms = BatchTransposedCoordinatesToNorms ( d_, NTrans_, VCand_, insertionBound_ ); const double normTime = timer.Stop(); const double normGflops = double(numQueued_)*n*n/(1.e9*normTime); Output (numQueued_," norms: ",timer.Stop()," seconds (", normGflops," GFlop/s"); */ BatchTransposedSparseToCoordinates ( NTrans_, YActive, VCand_, blocksize_ ); colNorms = BatchTransposedCoordinatesToNorms ( d_, NTrans_, VCand_, insertionBound_ ); } else { BatchSparseToCoordinates( N_, YActive, VCand_, blocksize_ ); colNorms = BatchCoordinatesToNorms( d_, N_, VCand_, insertionBound_ ); } for( Int j=0; j<numQueued_; ++j ) { for( Int k=0; k<insertionBound_; ++k ) { const Base<Field> bNorm = colNorms(j,k); if( bNorm < normUpperBounds_(k) && bNorm != Base<Field>(0) ) { const Range<Int> subInd(k,END); auto y = YActive(subInd,IR(j)); auto vCand = VCand_(subInd,IR(j)); Output ("normUpperBound=",normUpperBounds_(k), ", bNorm=",bNorm,", k=",k); Print( y, "y" ); // Check that the reverse transformation holds Matrix<Field> yCheck; CoordinatesToSparse( N_(subInd,subInd), vCand, yCheck ); yCheck -= y; if( FrobeniusNorm(yCheck) != Base<Field>(0) ) { Print( B_(ALL,subInd), "B" ); Print( d_(subInd,ALL), "d" ); Print( N_(subInd,subInd), "N" ); Print( vCand, "vCand" ); Print( yCheck, "eCheck" ); LogicError("Invalid sparse transformation"); } Copy( vCand, v_ ); Print( v_, "v" ); Matrix<Field> b; Zeros( b, B_.Height(), 1 ); Gemv( NORMAL, Field(1), B_(ALL,subInd), v_, Field(0), b ); Print( b, "b" ); normUpperBounds_(k) = bNorm; foundVector_ = true; insertionBound_ = k+1; } // TODO(poulson): Keep track of 'stock' vectors? } } numQueued_ = 0; Zero( Y_ ); }