Exemple #1
0
void eqnsys<nr_type_t>::solve_gauss (void) {
  nr_double_t MaxPivot;
  nr_type_t f;
  int i, c, r, pivot;

  // triangulate the matrix
  for (i = 0; i < N; i++) {
    // find maximum column value for pivoting
    for (MaxPivot = 0, pivot = r = i; r < N; r++) {
      if (abs (A_(r, i)) > MaxPivot) {
	MaxPivot = abs (A_(r, i));
	pivot = r;
      }
    }
    // exchange rows if necessary
    assert (MaxPivot != 0);
    if (i != pivot) {
      A->exchangeRows (i, pivot);
      B->exchangeRows (i, pivot);
    }
    // compute new rows and columns
    for (r = i + 1; r < N; r++) {
      f = A_(r, i) / A_(i, i);
      for (c = i + 1; c < N; c++) A_(r, c) -= f * A_(i, c);
      B_(r) -= f * B_(i);
    }
  }

  // backward substitution
  for (i = N - 1; i >= 0; i--) {
    f = B_(i);
    for (c = i + 1; c < N; c++) f -= A_(i, c) * X_(c);
    X_(i) = f / A_(i, i);
  }
}
Exemple #2
0
void eqnsys<nr_type_t>::substitute_qr_householder_ls (void) {
  int c, r;
  nr_type_t f;

  // forward substitution in order to solve R'X = B
  for (r = 0; r < N; r++) {
    for (f = B_(r), c = 0; c < r; c++) f -= A_(c, r) * B_(c);
    if (abs (A_(r, r)) > std::numeric_limits<nr_double_t>::epsilon())
      B_(r) = f / A_(r, r);
    else
      B_(r) = 0;
  }

  // compute the least square solution QX
  for (c = N - 1; c >= 0; c--) {
    if (T_(c) != 0) {
      // scalar product u' * B
      for (f = B_(c), r = c + 1; r < N; r++) f += cond_conj (A_(r, c)) * B_(r);
      // z - T * f * u_k
      f *= T_(c); B_(c) -= f;
      for (r = c + 1; r < N; r++) B_(r) -= f * A_(r, c);
    }
  }

  // permute solution vector
  for (r = 0; r < N; r++) X_(cMap[r]) = B_(r);
}
	static void evaluate(const Shell &A, const Shell &B,
			     const Shell &C, const Shell &D,
			     double *Q)  {
	    adapter::rysq::Shell A_(A), B_(B), C_(C), D_(D);
	    //rysq::Quartet<rysq::Shell> quartet(A_, B_, C_, D_);
	    boost::array<Center,4> centers = {{
		    A.center(), B.center(), C.center(), D.center() }};
	    rysq::Eri eri(rysq::Quartet<rysq::Shell>(A_, B_, C_, D_));
	    eri(centers, Q);
	}
Exemple #4
0
void eqnsys<nr_type_t>::solve_gauss_jordan (void) {
  nr_double_t MaxPivot;
  nr_type_t f;
  int i, c, r, pivot;

  // create the eye matrix
  for (i = 0; i < N; i++) {
    // find maximum column value for pivoting
    for (MaxPivot = 0, pivot = r = i; r < N; r++) {
      if (abs (A_(r, i)) > MaxPivot) {
	MaxPivot = abs (A_(r, i));
	pivot = r;
      }
    }
    // exchange rows if necessary
    assert (MaxPivot != 0);
    if (i != pivot) {
      A->exchangeRows (i, pivot);
      B->exchangeRows (i, pivot);
    }

    // compute current row
    f = A_(i, i);
    for (c = i + 1; c < N; c++) A_(i, c) /= f;
    B_(i) /= f;

    // compute new rows and columns
    for (r = 0; r < N; r++) {
      if (r != i) {
	f = A_(r, i);
        for (c = i + 1; c < N; c++) A_(r, c) -= f * A_(i, c);
        B_(r) -= f * B_(i);
      }
    }
  }

  // right hand side is now the solution
  *X = *B;
}
Exemple #5
0
void eqnsys<nr_type_t>::substitute_qrh (void) {
  int c, r;
  nr_type_t f;

  // form the new right hand side Q'B
  for (c = 0; c < N - 1; c++) {
    // scalar product u_k^T * B
    for (f = 0, r = c; r < N; r++) f += cond_conj (A_(r, c)) * B_(r);
    // z - 2 * f * u_k
    for (r = c; r < N; r++) B_(r) -= 2.0 * f * A_(r, c);
  }

  // backward substitution in order to solve RX = Q'B
  for (r = N - 1; r >= 0; r--) {
    f = B_(r);
    for (c = r + 1; c < N; c++) f -= A_(r, c) * X_(cMap[c]);
    if (abs (R_(r)) > std::numeric_limits<nr_double_t>::epsilon())
      X_(cMap[r]) = f / R_(r);
    else
      X_(cMap[r]) = 0;
  }
}
Exemple #6
0
void eqnsys<nr_type_t>::substitute_qr_householder (void) {
  int c, r;
  nr_type_t f;

  // form the new right hand side Q'B
  for (c = 0; c < N; c++) {
    if (T_(c) != 0) {
      // scalar product u' * B
      for (f = B_(c), r = c + 1; r < N; r++) f += cond_conj (A_(r, c)) * B_(r);
      // z - T * f * u
      f *= cond_conj (T_(c)); B_(c) -= f;
      for (r = c + 1; r < N; r++) B_(r) -= f * A_(r, c);
    }
  }

  // backward substitution in order to solve RX = Q'B
  for (r = N - 1; r >= 0; r--) {
    for (f = B_(r), c = r + 1; c < N; c++) f -= A_(r, c) * X_(cMap[c]);
    if (abs (A_(r, r)) > std::numeric_limits<nr_double_t>::epsilon())
      X_(cMap[r]) = f / A_(r, r);
    else
      X_(cMap[r]) = 0;
  }
}
Exemple #7
0
void eqnsys<nr_type_t>::substitute_lu_doolittle (void) {
  nr_type_t f;
  int i, c;

  // forward substitution in order to solve LY = B
  for (i = 0; i < N; i++) {
    f = B_(rMap[i]);
    for (c = 0; c < i; c++) f -= A_(i, c) * X_(c);
    // remember that the Lii diagonal are ones only in Doolittle's definition
    X_(i) = f;
  }

  // backward substitution in order to solve UX = Y
  for (i = N - 1; i >= 0; i--) {
    f = X_(i);
    for (c = i + 1; c < N; c++) f -= A_(i, c) * X_(c);
    X_(i) = f / A_(i, i);
  }
}
Exemple #8
0
void eqnsys<nr_type_t>::substitute_svd (void) {
  int c, r;
  nr_type_t f;
  // calculate U'B
  for (c = 0; c < N; c++) {
    f = 0.0;
    // non-zero result only if S is non-zero
    if (S_(c) != 0.0) {
      for (r = 0; r < N; r++) f += cond_conj (U_(r, c)) * B_(r);
      // this is the divide by S
      f /= S_(c);
    }
    R_(c) = f;
  }
  // matrix multiply by V to get the final solution
  for (r = 0; r < N; r++) {
    for (f = 0.0, c = 0; c < N; c++) f += cond_conj (V_(c, r)) * R_(c);
    X_(r) = f;
  }
}
Exemple #9
0
void test_matrix() {
	//matrix.h vs. Eigen

	//for timing
	int n = (int) 1e6;
	timeval t0, t1;

	const int SIZE = 10;

	//matrix.h matrices
	Real A[SIZE*SIZE];
	Real B[SIZE*SIZE];
	Real B0[SIZE*SIZE]; //backup, to initialize Eigen
	Real b[SIZE];

	for (int i=0; i<SIZE*SIZE; i++) {
		A[i] = (Real) i+1;
		B[i] = 1;
		B0[i] = B[i];
	}

	for (int i=0; i<SIZE; i++) {
		b[i] = 1;
	}


	//std::cout << "A=\n"; printMatReal(SIZE,SIZE,A,-1,-1);
	//std::cout << "B=\n"; printMatReal(SIZE,SIZE,B,-1,-1);
	//std::cout << "b=\n"; printMatReal(SIZE,1,b,-1,-1);


	//matrix.h
	int ri = 2;
	int ci = 3;
	int brows = SIZE/2;
	int bcols = SIZE/2;
	Real val = 2.0;
	//Real m = 1.0 + 1e-6;
	Real m = 1.0;

	gettimeofday(&t0, NULL);

	for (int i=0; i<n; i++) {
		//setMat(SIZE,SIZE,val,B);
		//setMatRow(SIZE,SIZE,ri,val,B);
		//setMatCol(SIZE,ci,val,B);
		//setMatBlock(SIZE,ri,ci,brows,bcols,val,B);
		
		//mulcMat(SIZE,SIZE,m,B);
		//mulcMatRow(SIZE,SIZE,ri,m,B);
		//mulcMatCol(SIZE,ci,m,B);
		//mulcMatBlock(SIZE,ri,ci,brows,bcols,m,B);
		
		//copyMat(SIZE,SIZE,A,B);
		//copyMatRow(SIZE,SIZE,ri,A,SIZE,ri,B);
		//copyMatCol(SIZE,ci,A,ci,B);
		//copyMatBlock(SIZE,ri,ci,brows,bcols,A, SIZE,ri,ci,B);
		
		//copyTMat(SIZE,SIZE,A,B);
		
		//addmMat(SIZE,SIZE,A,m,B);
		//addmMatRow(SIZE,SIZE,ri,A,SIZE,ri,m,B);
		//addmMatCol(SIZE,ci,A,ci,m,B);
		//addmMatBlock(SIZE,ri,ci,brows,bcols,A, SIZE,ri,ci,m,B);

		//multMatVec(SIZE,SIZE,A,b,m,B);
		//multMatTVec(SIZE,SIZE,A,b,m,B);
		//multMatMat(SIZE,SIZE,A,SIZE,A,m,B);
		multMatTMat(SIZE,SIZE,A,SIZE,A,m,B);

	}
	gettimeofday(&t1, NULL);

	std::cout << "(matrix.h) B=\n"; printMatReal(SIZE,SIZE,B,0,-1);
	std::cout << "iterations: " << (Real) n << std::endl;
	std::cout << "clock (sec): " << tosec(t1)-tosec(t0) << std::endl;
	std::cout << std::endl;


	
	if (1) {
		//Eigen

		//dynamic Eigen matrices
		Eigen::Matrix<Real,Eigen::Dynamic,Eigen::Dynamic> A_(SIZE,SIZE);
		Eigen::Matrix<Real,Eigen::Dynamic,Eigen::Dynamic> B_(SIZE,SIZE);
		Eigen::Matrix<Real,Eigen::Dynamic,Eigen::Dynamic> b_(SIZE,1);

		//fixed Eigen matrices
		//Eigen::Matrix<Real,SIZE,SIZE> A_;
		//Eigen::Matrix<Real,SIZE,SIZE> B_;
		//Eigen::Matrix<Real,SIZE,1> b_;

		memcpy(A_.data(), A, sizeof(Real)*SIZE*SIZE);
		memcpy(B_.data(), B0, sizeof(Real)*SIZE*SIZE);
		memcpy(b_.data(), b, sizeof(Real)*SIZE);

		//std::cout << "Eigen:\n";
		//std::cout << "A_=\n" << A_ << std::endl;
		//std::cout << "B_=\n" << B_ << std::endl;
		//std::cout << "b_=\n" << b_ << std::endl;
	
		gettimeofday(&t0, NULL);
		for (int i=0; i<n; i++) {
			//B_.setConstant(val);
			//B_.row(ri).setConstant(val);
			//B_.col(ci).setConstant(val);
			//B_.block(ri,ci,brows,bcols).setConstant(val);
		
			//B_ *= m;
			//B_.row(ri) *= m;
			//B_.col(ci) *= m;
			//B_.block(ri,ci,brows,bcols) *= m;
		
			//B_ = A_;
			//B_.row(ri) = A_.row(ri);
			//B_.col(ci) = A_.col(ci);
			//B_.block(ri,ci,brows,bcols) = A_.block(ri,ci,brows,bcols);
		
			//B_ = A_.transpose();

			//B_ += (A_ * m);
			//B_.row(ri) += (A_.row(ri) * m);
			//B_.col(ci) += (A_.col(ci) * m);
			//B_.block(ri,ci,brows,bcols) += (A_.block(ri,ci,brows,bcols) * m);
		
			//B_.col(0) = A_*b_;
			//B_.col(0) = A_.transpose()*b_;
			//B_ = A_*A_;
			B_ = A_.transpose()*A_;
		}
		gettimeofday(&t1, NULL);

		std::cout << "(Eigen) B=\n" << B_ << std::endl;
		std::cout << "iterations: " << (Real) n << std::endl;
		std::cout << "clock (sec): " << tosec(t1)-tosec(t0) << std::endl;
	}
}
Exemple #10
0
static int
gk104_top_oneinit(struct nvkm_top *top)
{
	struct nvkm_subdev *subdev = &top->subdev;
	struct nvkm_device *device = subdev->device;
	struct nvkm_top_device *info = NULL;
	u32 data, type, inst;
	int i;

	for (i = 0; i < 64; i++) {
		if (!info) {
			if (!(info = nvkm_top_device_new(top)))
				return -ENOMEM;
			type = ~0;
			inst = 0;
		}

		data = nvkm_rd32(device, 0x022700 + (i * 0x04));
		nvkm_trace(subdev, "%02x: %08x\n", i, data);
		switch (data & 0x00000003) {
		case 0x00000000: /* NOT_VALID */
			continue;
		case 0x00000001: /* DATA */
			inst        = (data & 0x3c000000) >> 26;
			info->addr  = (data & 0x00fff000);
			info->fault = (data & 0x000000f8) >> 3;
			break;
		case 0x00000002: /* ENUM */
			if (data & 0x00000020)
				info->engine  = (data & 0x3c000000) >> 26;
			if (data & 0x00000010)
				info->runlist = (data & 0x01e00000) >> 21;
			if (data & 0x00000008)
				info->intr    = (data & 0x000f8000) >> 15;
			if (data & 0x00000004)
				info->reset   = (data & 0x00003e00) >> 9;
			break;
		case 0x00000003: /* ENGINE_TYPE */
			type = (data & 0x7ffffffc) >> 2;
			break;
		}

		if (data & 0x80000000)
			continue;

		/* Translate engine type to NVKM engine identifier. */
#define A_(A) if (inst == 0) info->index = NVKM_ENGINE_##A
#define B_(A) if (inst + NVKM_ENGINE_##A##0 < NVKM_ENGINE_##A##_LAST + 1)      \
		info->index = NVKM_ENGINE_##A##0 + inst
		switch (type) {
		case 0x00000000: A_(GR    ); break;
		case 0x00000001: A_(CE0   ); break;
		case 0x00000002: A_(CE1   ); break;
		case 0x00000003: A_(CE2   ); break;
		case 0x00000008: A_(MSPDEC); break;
		case 0x00000009: A_(MSPPP ); break;
		case 0x0000000a: A_(MSVLD ); break;
		case 0x0000000b: A_(MSENC ); break;
		case 0x0000000c: A_(VIC   ); break;
		case 0x0000000d: A_(SEC   ); break;
		case 0x0000000e: B_(NVENC ); break;
		case 0x0000000f: A_(NVENC1); break;
		case 0x00000010: A_(NVDEC ); break;
		case 0x00000013: B_(CE    ); break;
			break;
		default:
			break;
		}

		nvkm_debug(subdev, "%02x.%d (%8s): addr %06x fault %2d "
				   "engine %2d runlist %2d intr %2d "
				   "reset %2d\n", type, inst,
			   info->index == NVKM_SUBDEV_NR ? NULL :
					  nvkm_subdev_name[info->index],
			   info->addr, info->fault, info->engine, info->runlist,
			   info->intr, info->reset);
		info = NULL;
	}

	return 0;
}
Exemple #11
0
void eqnsys<nr_type_t>::solve_sor (void) {
  nr_type_t f;
  int error, conv, i, c, r;
  int MaxIter = N; // -> less than N^3 operations
  nr_double_t reltol = 1e-4;
  nr_double_t abstol = NR_TINY;
  nr_double_t diff, crit, l = 1, d, s;

  // ensure that all diagonal values are non-zero
  ensure_diagonal ();

  // try to raise diagonal dominance
  preconditioner ();

  // decide here about possible convergence
  if ((crit = convergence_criteria ()) >= 1) {
#if DEBUG && 0
    logprint (LOG_STATUS, "NOTIFY: convergence criteria: %g >= 1 (%dx%d)\n",
	      crit, N, N);
#endif
    //solve_lu ();
    //return;
  }

  // normalize the equation system to have ones on its diagonal
  for (r = 0; r < N; r++) {
    f = A_(r, r);
    assert (f != 0); // singular matrix
    for (c = 0; c < N; c++) A_(r, c) /= f;
    B_(r) /= f;
  }

  // the current X vector is a good initial guess for the iteration
  tvector<nr_type_t> * Xprev = new tvector<nr_type_t> (*X);

  // start iterating here
  i = 0; error = 0;
  do {
    // compute new solution vector
    for (r = 0; r < N; r++) {
      for (f = 0, c = 0; c < N; c++) {
	if (c < r)      f += A_(r, c) * X_(c);
	else if (c > r) f += A_(r, c) * Xprev->get (c);
      }
      X_(r) = (1 - l) * Xprev->get (r) + l * (B_(r) - f);
    }
    // check for convergence
    for (s = 0, d = 0, conv = 1, r = 0; r < N; r++) {
      diff = abs (X_(r) - Xprev->get (r));
      if (diff >= abstol + reltol * abs (X_(r))) {
	conv = 0;
	break;
      }
      d += diff; s += abs (X_(r));
      if (!std::isfinite (diff)) { error++; break; }
    }
    if (!error) {
      // adjust relaxation based on average errors
      if ((s == 0 && d == 0) || d >= abstol * N + reltol * s) {
	// values <= 1 -> non-convergence to convergence
	if (l >= 0.6) l -= 0.1;
	if (l >= 1.0) l = 1.0;
      }
      else {
	// values >= 1 -> faster convergence
	if (l < 1.5) l += 0.01;
	if (l < 1.0) l = 1.0;
      }
    }
    // save last values
    *Xprev = *X;
  }
  while (++i < MaxIter && !conv);

  delete Xprev;

  if (!conv || error) {
    logprint (LOG_ERROR,
	      "WARNING: no convergence after %d sor iterations (l = %g)\n",
	      i, l);
    solve_lu_crout ();
  }
#if DEBUG && 0
  else {
    logprint (LOG_STATUS,
	      "NOTIFY: sor convergence after %d iterations\n", i);
  }
#endif
}
Exemple #12
0
void eqnsys<nr_type_t>::solve_iterative (void) {
  nr_type_t f;
  int error, conv, i, c, r;
  int MaxIter = N; // -> less than N^3 operations
  nr_double_t reltol = 1e-4;
  nr_double_t abstol = NR_TINY;
  nr_double_t diff, crit;

  // ensure that all diagonal values are non-zero
  ensure_diagonal ();

  // try to raise diagonal dominance
  preconditioner ();

  // decide here about possible convergence
  if ((crit = convergence_criteria ()) >= 1) {
#if DEBUG && 0
    logprint (LOG_STATUS, "NOTIFY: convergence criteria: %g >= 1 (%dx%d)\n",
	      crit, N, N);
#endif
    //solve_lu ();
    //return;
  }

  // normalize the equation system to have ones on its diagonal
  for (r = 0; r < N; r++) {
    f = A_(r, r);
    assert (f != 0); // singular matrix
    for (c = 0; c < N; c++) A_(r, c) /= f;
    B_(r) /= f;
  }

  // the current X vector is a good initial guess for the iteration
  tvector<nr_type_t> * Xprev = new tvector<nr_type_t> (*X);

  // start iterating here
  i = 0; error = 0;
  do {
    // compute new solution vector
    for (r = 0; r < N; r++) {
      for (f = 0, c = 0; c < N; c++) {
	if (algo == ALGO_GAUSS_SEIDEL) {
	  // Gauss-Seidel
	  if (c < r)      f += A_(r, c) * X_(c);
	  else if (c > r) f += A_(r, c) * Xprev->get (c);
	}
	else {
	  // Jacobi
	  if (c != r) f += A_(r, c) * Xprev->get (c);
	}
      }
      X_(r) = B_(r) - f;
    }
    // check for convergence
    for (conv = 1, r = 0; r < N; r++) {
      diff = abs (X_(r) - Xprev->get (r));
      if (diff >= abstol + reltol * abs (X_(r))) {
	conv = 0;
	break;
      }
      if (!std::isfinite (diff)) { error++; break; }
    }
    // save last values
    *Xprev = *X;
  }
  while (++i < MaxIter && !conv);

  delete Xprev;

  if (!conv || error) {
    logprint (LOG_ERROR,
	      "WARNING: no convergence after %d %s iterations\n",
	      i, algo == ALGO_JACOBI ? "jacobi" : "gauss-seidel");
    solve_lu_crout ();
  }
#if DEBUG && 0
  else {
    logprint (LOG_STATUS,
	      "NOTIFY: %s convergence after %d iterations\n",
	      algo == ALGO_JACOBI ? "jacobi" : "gauss-seidel", i);
  }
#endif
}
Exemple #13
0
    void Flush()
    {
        if( numQueued_ == 0 )
            return;

        auto YActive = Y_( ALL, IR(0,numQueued_) );

        Matrix<Base<Field>> colNorms;
        if( useTranspose_ )
        {
            // TODO(poulson): Add this as an option
            /*
            Timer timer;
            timer.Start();
            BatchTransposedSparseToCoordinates
            ( NTrans_, YActive, VCand_, blocksize_ );
            const double transformTime = timer.Stop();
            const double n = YActive.Height();
            const double transformGflops =
              double(numQueued_)*n*n/(1.e9*transformTime);
            Output
            (numQueued_," transforms: ",timer.Stop()," seconds (",
             transformGflops," GFlop/s");
            timer.Start();
            colNorms =
              BatchTransposedCoordinatesToNorms
              ( d_, NTrans_, VCand_, insertionBound_ );
            const double normTime = timer.Stop();
            const double normGflops = double(numQueued_)*n*n/(1.e9*normTime);
            Output
            (numQueued_," norms: ",timer.Stop()," seconds (",
             normGflops," GFlop/s");
            */

            BatchTransposedSparseToCoordinates
            ( NTrans_, YActive, VCand_, blocksize_ );
            colNorms =
              BatchTransposedCoordinatesToNorms
              ( d_, NTrans_, VCand_, insertionBound_ );
        }
        else
        {
            BatchSparseToCoordinates( N_, YActive, VCand_, blocksize_ );
            colNorms =
              BatchCoordinatesToNorms( d_, N_, VCand_, insertionBound_ );
        }

        for( Int j=0; j<numQueued_; ++j )
        {
            for( Int k=0; k<insertionBound_; ++k )
            {
                const Base<Field> bNorm = colNorms(j,k);
                if( bNorm < normUpperBounds_(k) && bNorm != Base<Field>(0) )
                {
                    const Range<Int> subInd(k,END);

                    auto y = YActive(subInd,IR(j));
                    auto vCand = VCand_(subInd,IR(j));

                    Output
                    ("normUpperBound=",normUpperBounds_(k),
                     ", bNorm=",bNorm,", k=",k);
                    Print( y, "y" );

                    // Check that the reverse transformation holds
                    Matrix<Field> yCheck;
                    CoordinatesToSparse( N_(subInd,subInd), vCand, yCheck );
                    yCheck -= y;
                    if( FrobeniusNorm(yCheck) != Base<Field>(0) )
                    {
                        Print( B_(ALL,subInd), "B" );
                        Print( d_(subInd,ALL), "d" );
                        Print( N_(subInd,subInd), "N" );
                        Print( vCand, "vCand" );
                        Print( yCheck, "eCheck" );
                        LogicError("Invalid sparse transformation");
                    }

                    Copy( vCand, v_ );
                    Print( v_, "v" );

                    Matrix<Field> b;
                    Zeros( b, B_.Height(), 1 );
                    Gemv( NORMAL, Field(1), B_(ALL,subInd), v_, Field(0), b );
                    Print( b, "b" );

                    normUpperBounds_(k) = bNorm;
                    foundVector_ = true;
                    insertionBound_ = k+1;
                }
                // TODO(poulson): Keep track of 'stock' vectors?
            }
        }
        numQueued_ = 0;
        Zero( Y_ );
    }