static void mg3P(double **u, double *v, double **r, double a[4],
		 double c[4], int n1, int n2, int n3, int k) {

/*--------------------------------------------------------------------
c-------------------------------------------------------------------*/

/*--------------------------------------------------------------------
c     multigrid V-cycle routine
c-------------------------------------------------------------------*/

    int j;

/*--------------------------------------------------------------------
c     down cycle.
c     restrict the residual from the find grid to the coarse
c-------------------------------------------------------------------*/

    for (k = lt; k >= lb+1; k--) {
	j = k-1;
	rprj3(r[k], m1[k], m2[k], m3[k],
	      r[j], m1[j], m2[j], m3[j], k);
    }

    k = lb;
/*--------------------------------------------------------------------
c     compute an approximate solution on the coarsest grid
c-------------------------------------------------------------------*/
    zero3(u[k], m1[k], m2[k], m3[k]);
    psinv(r[k], u[k], m1[k], m2[k], m3[k], c, k);

    for (k = lb+1; k <= lt-1; k++) {
	j = k-1;
/*--------------------------------------------------------------------
c        prolongate from level k-1  to k
c-------------------------------------------------------------------*/
	zero3(u[k], m1[k], m2[k], m3[k]);
	interp(u[j], m1[j], m2[j], m3[j],
	       u[k], m1[k], m2[k], m3[k], k);
/*--------------------------------------------------------------------
c        compute residual for level k
c-------------------------------------------------------------------*/
	resid(u[k], r[k], r[k], m1[k], m2[k], m3[k], a, k);
/*--------------------------------------------------------------------
c        apply smoother
c-------------------------------------------------------------------*/
	psinv(r[k], u[k], m1[k], m2[k], m3[k], c, k);
    }

    j = lt - 1;
    k = lt;
    interp(u[j], m1[j], m2[j], m3[j], u[lt], n1, n2, n3, k);
    resid(u[lt], v, r[lt], n1, n2, n3, a, k);
    psinv(r[lt], u[lt], n1, n2, n3, c, k);
}
Exemple #2
0
void EBPoissonOp::
levelJacobi(LevelData<EBCellFAB>&       a_phi,
            const LevelData<EBCellFAB>& a_rhs)
{
  CH_TIME("EBPoissonOp::levelJacobi");

  // Overhauled from previous in-place Gauss-Seidel-like method
  // This implementation is very inefficient in terms of memory usage,
  // and could be greatly improved.  It has the advantage, though,
  // of being very simple and easy to verify as correct.

  EBCellFactory factory(m_eblg.getEBISL());
  // Note: this is hardcoded to a single variable (component),
  // like some other code in this class
  LevelData<EBCellFAB> resid(m_eblg.getDBL(), 1, m_ghostCellsRHS, factory);

  residual(resid, a_phi, a_rhs, true);

  LevelData<EBCellFAB> relaxationCoeff(m_eblg.getDBL(), 1, m_ghostCellsRHS, factory);
  getJacobiRelaxCoeff(relaxationCoeff);

  EBLevelDataOps::scale(resid, relaxationCoeff);

  EBLevelDataOps::incr(a_phi, resid, 1.0);
}
Exemple #3
0
void TrdHSegmentR::calChi2(){
  Chi2=0.;
  for(int i=0;i<nTrdRawHit();i++){
    TRDHitRZD rzd=TRDHitRZD(*pTrdRawHit(i));
    Chi2+=pow(resid(rzd.r,rzd.z,rzd.d)/ (0.62/sqrt(12.)),2);
  }
}
Exemple #4
0
size_t
suiolite::capacity () const
{
  int inuse = resid ();
  assert (inuse <= len);
  return len - inuse;
}
Exemple #5
0
int residuals_b_predicts_a(double ax[], double ay[], double bx[], double by[],
			   int use[], int n, double residuals[], double *rms)
{
    resid(ax, ay, bx, by, use, n, residuals, rms, 0);

    return 0;
}
bool
AmesosBTFGlobal_LinearProblem::
rvs()
{
  //  cout << "AmesosBTFGlobal_LinearProblem: NewLHS_" << endl;
  //  cout << *NewLHS_ << endl;

  OldLHS_->Import( *NewLHS_, *Importer2_, Insert );
  int numrhs = OldLHS_->NumVectors();
  std::vector<double> actual_resids( numrhs ), rhs_norm( numrhs );
  Epetra_MultiVector resid( OldLHS_->Map(), numrhs );
  OldMatrix_->Apply( *OldLHS_, resid );
  resid.Update( -1.0, *OldRHS_, 1.0 );
  resid.Norm2( &actual_resids[0] );
  OldRHS_->Norm2( &rhs_norm[0] );
  if (OldLHS_->Comm().MyPID() == 0 ) {
    for (int i=0; i<numrhs; i++ ) {
      std::cout << "Problem " << i << " (in AmesosBTFGlobal): \t" << actual_resids[i]/rhs_norm[i] << std::endl;
    }
  }
  //cout << "AmesosBTFGlobal_LinearProblem: OldLHS_" << endl;
  //cout << *OldLHS_ << endl;

  return true;
}
// Simple Power method algorithm
double power_method(const Epetra_CrsMatrix& A) {  
  // variable needed for iteration
  double lambda = 0.0;
  int niters = A.RowMap().NumGlobalElements()*10;
  double tolerance = 1.0e-10;
  // Create vectors
  Epetra_Vector q(A.RowMap());
  Epetra_Vector z(A.RowMap());
  Epetra_Vector resid(A.RowMap());
  // Fill z with random Numbers
  z.Random();
  // variable needed for iteration
  double normz;
  double residual = 0;
  int iter = 0;
  while (iter==0 || (iter < niters && residual > tolerance)) {
    z.Norm2(&normz); // Compute 2-norm of z
    q.Scale(1.0/normz, z);
    A.Multiply(false, q, z); // Compute z = A*q
    q.Dot(z, &lambda); // Approximate maximum eigenvalue
    if (iter%10==0 || iter+1==niters) {
      // Compute A*q - lambda*q every 10 iterations
      resid.Update(1.0, z, -lambda, q, 0.0);
      resid.Norm2(&residual);
      if (q.Map().Comm().MyPID()==0)
	std::cout << "Iter = " << iter << "  Lambda = " << lambda 
	     << "  Two-norm of A*q - lambda*q = " 
	     << residual << std::endl;
    } 
    iter++;
  }
  return(lambda);
}
Exemple #8
0
int TpetraLinearSolver::residual_norm(int whichNorm, Teuchos::RCP<LinSys::Vector> sln, double& norm)
{
  LinSys::Vector resid(rhs_->getMap());
  ThrowRequire(! (sln.is_null()  || rhs_.is_null() ) );

  if (matrix_->isFillActive() )
  {
    // FIXME
    //!matrix_->fillComplete(map_, map_);
    throw std::runtime_error("residual_norm");
  }
  matrix_->apply(*sln, resid);

  LinSys::OneDVector rhs = rhs_->get1dViewNonConst ();
  LinSys::OneDVector res = resid.get1dViewNonConst ();
  for (int i=0; i<rhs.size(); ++i)
    res[i] -= rhs[i];
  if ( whichNorm == 0 )
    norm = resid.normInf();
  else if ( whichNorm == 1 )
    norm = resid.norm1();
  else if ( whichNorm == 2 )
    norm = resid.norm2();
  else
    return 1;

  return 0;
}
Exemple #9
0
/* -----------------------------------------------------------------------------------
 * Solve: M x = b
 *
 */
int
gsl_sparse_matrix_complex_LU_solve(gsl_sparse_matrix_complex *spmat, double *b_real, double *b_imag, double *x_real, double *x_imag)
{
    void *Symbolic, *Numeric;
    int status;

    //gsl_sparse_matrix_complex_print_col(spmat);

    /* --- symbolic factorization --- */

    status = umfpack_zl_symbolic(spmat->n, spmat->n, spmat->Ap, spmat->Ai, spmat->Ax, spmat->Az, &Symbolic, spmat->Control, spmat->Info);
   
    if (status < 0)
        {
            umfpack_zl_report_info(spmat->Control, spmat->Info);
            //umfpack_zl_report_status(spmat->Control, status);
            fprintf(stderr, "%s: umfpack_zl_symbolic failed\n", __PRETTY_FUNCTION__);
        return -1;
        }

        //printf("%s: Symbolic factorization of A: ", __PRETTY_FUNCTION__);
    //umfpack_zl_report_symbolic(Symbolic, spmat->Control);

    /* --- numeric factorization --- */

    status = umfpack_zl_numeric(spmat->Ap, spmat->Ai, spmat->Ax, spmat->Az, Symbolic, &Numeric, spmat->Control, spmat->Info);

    if (status < 0)
    {
            umfpack_zl_report_info(spmat->Control, spmat->Info);
            umfpack_zl_report_status(spmat->Control, status);
            fprintf(stderr, "%s: umfpack_zl_numeric failed", __PRETTY_FUNCTION__);
        return -1;
        }

        //printf("%s: Numeric factorization of A: ", __PRETTY_FUNCTION__);
    //umfpack_zl_report_numeric(Numeric, spmat->Control);    

    /* --- Solve M x = b --- */
    
    status = umfpack_zl_solve(UMFPACK_A, spmat->Ap, spmat->Ai, spmat->Ax, spmat->Az, x_real, x_imag, b_real, b_imag, Numeric, spmat->Control, spmat->Info);

    //umfpack_zl_report_info(spmat->Control, spmat->Info);
        //umfpack_zl_report_status(spmat->Control, status);

        if (status < 0)
        {
            fprintf(stderr, "%s: umfpack_zl_solve failed\n", __PRETTY_FUNCTION__);
        }
       //printf("%s: x (solution of Ax=b): ") ;
    //umfpack_zl_report_vector(spmat->n, x_real, x_imag, spmat->Control);
    
    {
        double rnorm = resid(FALSE, spmat->Ap, spmat->Ai, spmat->Ax, spmat->Az, x_real, x_imag, b_real, b_imag, spmat->n);
           printf ("maxnorm of residual: %g\n\n", rnorm) ;
    }
    
    return 0;
}
Exemple #10
0
bool IterativeSolvers::pcg(const IRCMatrix &A,
                           Vector &x,
                           const Vector &b,
                           const Preconditioner &M) {
    /*!
      Solves Ax=b using the preconditioned conjugate gradient method.
      */
    const idx N = x.getLength();
    real resid(100.0);
    Vector p(N), z(N), q(N);
    real alpha;
    real normr(0);
    real normb = norm(b);
    real rho(0), rho_1(0), beta(0);
    Vector r = b - A * x;
    if (normb == 0.0)
        normb = 1;
    resid = norm(r) / normb;
    if (resid <= IterativeSolvers::toler) {
        IterativeSolvers::toler = resid;
        IterativeSolvers::maxIter = 0;
        return true;
    }
    // MAIN LOOP
    idx i = 1;
    for (; i <= IterativeSolvers::maxIter; i++) {
        M.solveMxb(z, r);
        rho = dot(r, z);
        if (i == 1)
            p = z;
        else {
            beta = rho / rho_1;
            aypx(beta, p, z); // p = beta*p + z;
        }
        // CALCULATES q = A*p AND dp = dot(q,p)
        real dp = multiply_dot(A, p, q);
        alpha = rho / dp;
        normr = 0;
#ifdef USES_OPENMP
        #pragma omp parallel for reduction(+:normr)
#endif
        for (idx j = 0 ; j < N ; ++j) {
            x[j] += alpha * p[j]; // x + alpha(0) * p;
            r[j] -= alpha * q[j]; // r - alpha(0) * q;
            normr += r[j] * r[j];
        }
        normr = sqrt(normr);
        resid = normr / normb;
        if (resid <= IterativeSolvers::toler) {
            IterativeSolvers::toler = resid;
            IterativeSolvers::maxIter = i;
            return true;
        }
        rho_1 = rho;
    }
    IterativeSolvers::toler = resid;
    return false;
}
Exemple #11
0
void
suiolite::grow (size_t ns)
{
  assert (resid () == 0);
  assert (bytes_read == 0);
  xfree (buf);
  len = min<int> (ns, SUIOLITE_MAX_BUFLEN);
  buf = static_cast<char *> (xmalloc (len));
  clear ();
}
//---------------------------------------------------------
void EulerShock2D::Report(bool bForce)
//---------------------------------------------------------
{
  CurvedEuler2D::Report(bForce);

  if (tstep>=1 && tstep <= resid.size()) 
  {
    // calculate residual
    // resid(tstep) = sqrt(sum(sum(sum((Q-oldQ).^2)))/(4*K*Np));
    // resid(tstep) = sqrt(sum(sum(sum((Q-oldQ).^2)))/(4*K*Np))/dt;


    DMat Qresid = sqr(Q-oldQ); double d4KNp = double(4*K*Np);
    resid(tstep) = sqrt(Qresid.sum()/d4KNp);

    if (eScramInlet == sim_type) {
      // scale residual
      resid(tstep) /= dt;
    }
  }
}
Exemple #13
0
DoubleVector RowDoubleMatrix::conjugateGradient(const DoubleVector &B, double epsilon, unsigned int niter, bool printMessages, unsigned int messageStep) const
{
    DoubleVector X(size_, 0.0); // начальное приближение - вектор нулей
    DoubleVector resid(size_); // невязка
    DoubleVector direction; // направление поиска
    DoubleVector temp(size_); // ременное хранилище для обмена данными
    double resid_norm; // норма невязки
    double alpha;
    double beta;

    double resid_resid, resid_resid_new;

    residual(X, B, resid);

    direction = resid;

    resid_norm = resid.norm_2();

    if (printMessages) std::cout << "Начальная невязка: " << resid_norm << std::endl;
    if (resid_norm > epsilon)
    {
        resid_resid = resid * resid;
        for (unsigned int i = 0; i < niter; i++)
        {
            product(direction, temp);
//            std::cout << direction.norm_2() << "    " << temp.norm_2() << std::endl;
            alpha = (resid_resid) / (direction * temp);
            X += alpha * direction;
            resid -= alpha * temp;
            resid_resid_new = resid * resid;
            resid_norm = sqrt(resid_resid_new);
            if (resid_norm <= epsilon)
            {
                if (printMessages)
                    std::cout << "Решение найдено. Итераций: " << i << ", невязка: " << resid_norm << std::endl;
                break;
            }
            if (printMessages && (i % messageStep == 0))
                std::cout << i << ", невязка: " << resid_norm << std::endl;

            beta = (resid_resid_new) / (resid_resid);
            // d = r + d*beta
            direction.scale(beta);
            direction += resid;
            //
            resid_resid = resid_resid_new;
        }
    }
    return X;
}
Exemple #14
0
void
suiolite::rembytes (ssize_t nbytes)
{
  ssize_t rd = resid ();
  assert (rd >= nbytes);
  bool docall = full () && nbytes > 0 && scb;
  int len2 = bep - rp;
  if (nbytes >= len2) {
    nbytes -= len2;
    rp = buf + nbytes;
    dep[1] = dep[0];
    dep[0] = buf; 
  } else {
    rp += nbytes;
  }
  if (docall)
    (*scb) ();
}
Exemple #15
0
int power_method(Epetra_CrsMatrix& A, double &lambda, int niters, 
		 double tolerance, bool verbose) {  

  Epetra_Vector q(A.RowMap());
  Epetra_Vector z(A.RowMap());
  Epetra_Vector resid(A.RowMap());

  Epetra_Flops * counter = A.GetFlopCounter();
  if (counter!=0) {
    q.SetFlopCounter(A);
    z.SetFlopCounter(A);
    resid.SetFlopCounter(A);
  }

  // Fill z with random Numbers
  z.Random();

  // variable needed for iteration
  double normz, residual;

  int ierr = 1;

  for (int iter = 0; iter < niters; iter++)
    {
      z.Norm2(&normz); // Compute 2-norm of z
      q.Scale(1.0/normz, z);
      A.Multiply(false, q, z); // Compute z = A*q
      q.Dot(z, &lambda); // Approximate maximum eigenvalue
      if (iter%100==0 || iter+1==niters)
	{
	  resid.Update(1.0, z, -lambda, q, 0.0); // Compute A*q - lambda*q
	  resid.Norm2(&residual);
	  if (verbose) cout << "Iter = " << iter << "  Lambda = " << lambda 
			    << "  Residual of A*q - lambda*q = " 
			    << residual << endl;
	} 
      if (residual < tolerance) {
	ierr = 0;
	break;
      }
    }
  return(ierr);
}
Exemple #16
0
int checkResults(Epetra_RowMatrix * A, Epetra_CrsMatrix * transA, 
                 Epetra_Vector * xexact, bool verbose) {

  int n = A->NumGlobalRows();

  if (n<100) cout << "A transpose = " << endl << *transA << endl;

  Epetra_Vector x1(View,A->OperatorDomainMap(), &((*xexact)[0]));
  Epetra_Vector b1(A->OperatorRangeMap());

  A->SetUseTranspose(true);

  Epetra_Time timer(A->Comm());
  double start = timer.ElapsedTime();
  A->Apply(x1, b1);
  if (verbose) cout << "\nTime to compute b1: matvec with original matrix using transpose flag  = " << timer.ElapsedTime() - start << endl;

  if (n<100) cout << "b1 = " << endl << b1 << endl;
  Epetra_Vector x2(View,transA->OperatorRangeMap(), &((*xexact)[0]));
  Epetra_Vector b2(transA->OperatorDomainMap());
  start = timer.ElapsedTime();
  transA->Multiply(false, x2, b2);
  if (verbose) cout << "\nTime to compute b2: matvec with transpose matrix                      = " << timer.ElapsedTime() - start << endl;

  if (n<100) cout << "b1 = " << endl << b1 << endl;

  double residual;
  Epetra_Vector resid(A->OperatorDomainMap());

  resid.Update(1.0, b1, -1.0, b2, 0.0);
  resid.Norm2(&residual);
  if (verbose) cout << "Norm of b1 - b2 = " << residual << endl;

  int ierr = 0;

  if (residual > 1.0e-10) ierr++;

  if (ierr!=0 && verbose) cerr << "Status: Test failed" << endl;
  else if (verbose) cerr << "Status: Test passed" << endl;

  return(ierr);
}
Exemple #17
0
int
NineNodeMixedQuad::addInertiaLoadToUnbalance(const Vector &accel)
{
  static const int numberGauss = 9 ;
  static const int numberNodes = 9 ;
  static const int ndf = 2 ; 

  int i;

  // check to see if have mass
  int haveRho = 0;
  for (i = 0; i < numberGauss; i++) {
    if (materialPointers[i]->getRho() != 0.0)
      haveRho = 1;
  }

  if (haveRho == 0)
    return 0;

  // Compute mass matrix
  int tangFlag = 1 ;
  formInertiaTerms( tangFlag ) ;

  // store computed RV fro nodes in resid vector
  int count = 0;

  for (i=0; i<numberNodes; i++) {
    const Vector &Raccel = nodePointers[i]->getRV(accel);
    for (int j=0; j<ndf; j++)
      resid(count++) = Raccel(i);
  }

  // create the load vector if one does not exist
  if (load == 0) 
    load = new Vector(numberNodes*ndf);

  // add -M * RV(accel) to the load vector
  load->addMatrixVector(1.0, mass, resid, -1.0);
  
  return 0;
}
void  ZeroLengthContact3D::formResidAndTangent( int tang_flag ) 

{



	// trial displacement vectors

 	Vector DispTrialS(3); // trial disp for slave node

	Vector DispTrialM(3); // trial disp for master node

	// trial frictional force vectors (in local coordinate)

    Vector t_trial(2);

    double TtrNorm;



    // Coulomb friction law surface

	double Phi;     



    int i, j;



    //zero stiffness and residual 

    stiff.Zero( ) ;

    resid.Zero( ) ;

   

	// detect contact and set flag

    ContactFlag = contactDetect(); 



	//opserr<<this->getTag()<< " ZeroLengthContact3D::ContactFlag=" << ContactFlag<<endln;





	if (ContactFlag == 1) // contacted

	{  

       // contact presure;

		pressure = Kn*gap;   // Kn : normal penalty

  

		DispTrialS=nodePointers[0]->getTrialDisp();

        DispTrialM=nodePointers[1]->getTrialDisp();



       //nodal displacements 

        double ul[6];



		ul[0]=DispTrialS(0);

		ul[1]=DispTrialS(1);

		ul[2]=DispTrialS(2);

		ul[3]=DispTrialM(0);

		ul[4]=DispTrialM(1);

		ul[5]=DispTrialM(2);



		t_trial.Zero();

        xi.Zero();	



		for (i=0; i<6; i++){

			xi(0) += T1(i)*ul[i];

			xi(1) += T2(i)*ul[i];

		}



		// Compute trial shear force

		for (i=0; i<2; i++)  t_trial(i)=Kt * (xi(i)-stickPt(i));  //Kt: tangential penalty

        TtrNorm=t_trial.Norm();



        // Coulomb friction law, trial state

		Phi = TtrNorm - (fs * pressure + cohesion);   // add cohesion

		

		if (Phi <= 0 ) { // stick case

  		//opserr<< "stick ...." << endln;



 			if ( tang_flag == 1 ) {

		    // stiff

				for (i=0; i<6; i++) {

					for (j=0; j<6; j++) {

						stiff(i,j) = Kn*(N(i)*N(j)) + Kt*(T1(i)*T1(j)+T2(i)*T2(j));	

					}

				}

			} //endif tang_flag

			// force

			for (i=0; i<6; i++) 

				 resid(i)= (-1*pressure)*N(i) + t_trial(0)*T1(i) + t_trial(1)*T2(i) ;

			//	 resid(i)= (-1*pressure)*N(i) - t_trial(0)*T1(i) - t_trial(1)*T2(i) ;



		} // end if stick

		else {              // slide case, non-symmetric stiff

            ContactFlag=2;  // set the contactFlag for sliding



		//	opserr<< "sliding ...." << endln;



            if ( tang_flag == 1 ) {

				// stiff

				double Pt1, Pt2;

				Pt1=t_trial(0)/TtrNorm;

				Pt2=t_trial(1)/TtrNorm;

				double C1=fs*Kn;

				double C2=Kt*(fs*pressure+cohesion)/TtrNorm;  // add cohesion, sept. 7, 2005



				for (i=0; i<6; i++) {

					for (j=0; j<6; j++) {

						stiff(i,j) = Kn*(N(i)*N(j)) - C1*(Pt1*T1(i)*N(j)+Pt2*T2(i)*N(j))

							    + C2*( (1-Pt1*Pt1)*T1(i)*T1(j) -    Pt1*Pt2 *T1(i)*T2(j)

					            - Pt1*Pt2 *T2(i)*T1(j) + (1-Pt1*Pt2)*T2(i)*T2(j)  );

					} //endfor i

				} //endfor j

			} // endif tang_flag

		

			// force

			double t1, t2;

			t1 = (fs*pressure+cohesion) * t_trial(0)/TtrNorm;  // add cohesion

            t2 = (fs*pressure+cohesion) * t_trial(1)/TtrNorm;  // add cohesion



			//opserr<<"gap=" << gap <<endln;

			//opserr<<"pressure= "<<pressure <<endln;



			for (i=0; i<6; i++) {

				resid(i) = (-1*pressure)*N(i)+t1*T1(i)+t2*T2(i) ;

			//	resid(i) = (-1*pressure)*N(i)-t1*T1(i)-t2*T2(i) ;

			}

		} //endif slide



	}  // endif ContactFlag



	// for NOT contact, do nothing, stiff and resid are zeroes



}
Exemple #19
0
int main(int argc, char *argv[])
{
  int ierr = 0, i, forierr = 0;
#ifdef EPETRA_MPI

  // Initialize MPI

  MPI_Init(&argc,&argv);
  int rank; // My process ID

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  Epetra_MpiComm Comm( MPI_COMM_WORLD );

#else

  int rank = 0;
  Epetra_SerialComm Comm;

#endif

  bool verbose = false;

  // Check if we should print results to standard out
  if (argc>1) if (argv[1][0]=='-' && argv[1][1]=='v') verbose = true;

  int verbose_int = verbose ? 1 : 0;
  Comm.Broadcast(&verbose_int, 1, 0);
  verbose = verbose_int==1 ? true : false;


  //  char tmp;
  //  if (rank==0) cout << "Press any key to continue..."<< endl;
  //  if (rank==0) cin >> tmp;
  //  Comm.Barrier();

  Comm.SetTracebackMode(0); // This should shut down any error traceback reporting
  int MyPID = Comm.MyPID();
  int NumProc = Comm.NumProc();

  if(verbose && MyPID==0)
    cout << Epetra_Version() << endl << endl;

  if (verbose) cout << "Processor "<<MyPID<<" of "<< NumProc
		    << " is alive."<<endl;

  // Redefine verbose to only print on PE 0
  if(verbose && rank!=0)
		verbose = false;

  int NumMyEquations = 10000;
  long long NumGlobalEquations = (NumMyEquations * NumProc) + EPETRA_MIN(NumProc,3);
  if(MyPID < 3)
    NumMyEquations++;

  // Construct a Map that puts approximately the same Number of equations on each processor

  Epetra_Map Map(NumGlobalEquations, NumMyEquations, 0LL, Comm);

  // Get update list and number of local equations from newly created Map
  vector<long long> MyGlobalElements(Map.NumMyElements());
  Map.MyGlobalElements(&MyGlobalElements[0]);

  // Create an integer vector NumNz that is used to build the Petra Matrix.
  // NumNz[i] is the Number of OFF-DIAGONAL term for the ith global equation on this processor

  vector<int> NumNz(NumMyEquations);

  // We are building a tridiagonal matrix where each row has (-1 2 -1)
  // So we need 2 off-diagonal terms (except for the first and last equation)

  for(i = 0; i < NumMyEquations; i++)
    if((MyGlobalElements[i] == 0) || (MyGlobalElements[i] == NumGlobalEquations - 1))
      NumNz[i] = 1;
    else
      NumNz[i] = 2;

  // Create a Epetra_Matrix

  Epetra_CrsMatrix A(Copy, Map, &NumNz[0]);
  EPETRA_TEST_ERR(A.IndicesAreGlobal(),ierr);
  EPETRA_TEST_ERR(A.IndicesAreLocal(),ierr);

  // Add  rows one-at-a-time
  // Need some vectors to help
  // Off diagonal Values will always be -1


  vector<double> Values(2);
  Values[0] = -1.0;
	Values[1] = -1.0;
	vector<long long> Indices(2);
  double two = 2.0;
  int NumEntries;

  forierr = 0;
  for(i = 0; i < NumMyEquations; i++) {
    if(MyGlobalElements[i] == 0) {
			Indices[0] = 1;
			NumEntries = 1;
		}
    else if (MyGlobalElements[i] == NumGlobalEquations-1) {
			Indices[0] = NumGlobalEquations-2;
			NumEntries = 1;
		}
    else {
			Indices[0] = MyGlobalElements[i]-1;
			Indices[1] = MyGlobalElements[i]+1;
			NumEntries = 2;
		}
		forierr += !(A.InsertGlobalValues(MyGlobalElements[i], NumEntries, &Values[0], &Indices[0])==0);
		forierr += !(A.InsertGlobalValues(MyGlobalElements[i], 1, &two, &MyGlobalElements[i])>0); // Put in the diagonal entry
  }
  EPETRA_TEST_ERR(forierr,ierr);

  // Finish up
  A.FillComplete();
  A.OptimizeStorage();

  Epetra_JadMatrix JadA(A);
  Epetra_JadMatrix JadA1(A);
  Epetra_JadMatrix JadA2(A);

  // Create vectors for Power method

  Epetra_Vector q(Map);
  Epetra_Vector z(Map); z.Random();
  Epetra_Vector resid(Map);

  Epetra_Flops flopcounter;
  A.SetFlopCounter(flopcounter);
  q.SetFlopCounter(A);
  z.SetFlopCounter(A);
  resid.SetFlopCounter(A);
  JadA.SetFlopCounter(A);
  JadA1.SetFlopCounter(A);
  JadA2.SetFlopCounter(A);


  if (verbose) cout << "=======================================" << endl
		    << "Testing Jad using CrsMatrix as input..." << endl
		    << "=======================================" << endl;

  A.ResetFlops();
  powerMethodTests(A, JadA, Map, q, z, resid, verbose);

  // Increase diagonal dominance

  if (verbose) cout << "\n\nIncreasing the magnitude of first diagonal term and solving again\n\n"
		    << endl;


  if (A.MyGlobalRow(0)) {
    int numvals = A.NumGlobalEntries(0);
    vector<double> Rowvals(numvals);
    vector<long long> Rowinds(numvals);
    A.ExtractGlobalRowCopy(0, numvals, numvals, &Rowvals[0], &Rowinds[0]); // Get A[0,0]

    for (i=0; i<numvals; i++) if (Rowinds[i] == 0) Rowvals[i] *= 10.0;

    A.ReplaceGlobalValues(0, numvals, &Rowvals[0], &Rowinds[0]);
  }
  JadA.UpdateValues(A);
  A.ResetFlops();
  powerMethodTests(A, JadA, Map, q, z, resid, verbose);

  if (verbose) cout << "================================================================" << endl
		          << "Testing Jad using Jad matrix as input matrix for construction..." << endl
		          << "================================================================" << endl;
  JadA1.ResetFlops();
  powerMethodTests(JadA1, JadA2, Map, q, z, resid, verbose);

#ifdef EPETRA_MPI
  MPI_Finalize() ;
#endif

return ierr ;
}
//
//  Amesos_TestMultiSolver.cpp reads in a matrix in Harwell-Boeing format, 
//  calls one of the sparse direct solvers, using blocked right hand sides
//  and computes the error and residual.  
//
//  TestSolver ignores the Harwell-Boeing right hand sides, creating
//  random right hand sides instead.  
//
//  Amesos_TestMultiSolver can test either A x = b or A^T x = b.
//  This can be a bit confusing because sparse direct solvers 
//  use compressed column storage - the transpose of Trilinos'
//  sparse row storage.
//
//  Matrices:
//    readA - Serial.  As read from the file.
//    transposeA - Serial.  The transpose of readA.
//    serialA - if (transpose) then transposeA else readA 
//    distributedA - readA distributed to all processes
//    passA - if ( distributed ) then distributedA else serialA
//
//
int Amesos_TestMultiSolver( Epetra_Comm &Comm, char *matrix_file, int numsolves, 
		      SparseSolverType SparseSolver, bool transpose,
		      int special, AMESOS_MatrixType matrix_type ) {


  int iam = Comm.MyPID() ;

  
  //  int hatever;
  //  if ( iam == 0 )  std::cin >> hatever ; 
  Comm.Barrier();


  Epetra_Map * readMap;
  Epetra_CrsMatrix * readA; 
  Epetra_Vector * readx; 
  Epetra_Vector * readb;
  Epetra_Vector * readxexact;
   
  std::string FileName = matrix_file ;
  int FN_Size = FileName.size() ; 
  std::string LastFiveBytes = FileName.substr( EPETRA_MAX(0,FN_Size-5), FN_Size );
  std::string LastFourBytes = FileName.substr( EPETRA_MAX(0,FN_Size-4), FN_Size );
  bool NonContiguousMap = false; 

  if ( LastFiveBytes == ".triU" ) { 
    NonContiguousMap = true; 
    // Call routine to read in unsymmetric Triplet matrix
    EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, false, Comm, readMap, readA, readx, 
						      readb, readxexact, NonContiguousMap ) );
  } else {
    if ( LastFiveBytes == ".triS" ) { 
      NonContiguousMap = true; 
      // Call routine to read in symmetric Triplet matrix
      EPETRA_CHK_ERR( Trilinos_Util_ReadTriples2Epetra( matrix_file, true, Comm, 
							readMap, readA, readx, 
							readb, readxexact, NonContiguousMap ) );
    } else {
      if (  LastFourBytes == ".mtx" ) { 
	EPETRA_CHK_ERR( Trilinos_Util_ReadMatrixMarket2Epetra( matrix_file, Comm, readMap, 
							       readA, readx, readb, readxexact) );
      } else {
	// Call routine to read in HB problem
	Trilinos_Util_ReadHb2Epetra( matrix_file, Comm, readMap, readA, readx, 
						     readb, readxexact) ;
      }
    }
  }

  Epetra_CrsMatrix transposeA(Copy, *readMap, 0);
  Epetra_CrsMatrix *serialA ; 

  if ( transpose ) {
    assert( CrsMatrixTranspose( readA, &transposeA ) == 0 ); 
    serialA = &transposeA ; 
  } else {
    serialA = readA ; 
  }

  // Create uniform distributed map
  Epetra_Map map(readMap->NumGlobalElements(), 0, Comm);
  Epetra_Map* map_;

  if( NonContiguousMap ) {
    //
    //  map gives us NumMyElements and MyFirstElement;
    //
    int NumGlobalElements =  readMap->NumGlobalElements();
    int NumMyElements = map.NumMyElements();
    int MyFirstElement = map.MinMyGID();
    std::vector<int> MapMap_( NumGlobalElements );
    readMap->MyGlobalElements( &MapMap_[0] ) ;
    Comm.Broadcast( &MapMap_[0], NumGlobalElements, 0 ) ; 
    map_ = new Epetra_Map( NumGlobalElements, NumMyElements, &MapMap_[MyFirstElement], 0, Comm);
  } else {
    map_ = new Epetra_Map( map ) ; 
  }


  // Create Exporter to distribute read-in matrix and vectors
  Epetra_Export exporter(*readMap, *map_);
  Epetra_CrsMatrix A(Copy, *map_, 0);

  Epetra_RowMatrix * passA = 0; 
  Epetra_MultiVector * passx = 0; 
  Epetra_MultiVector * passb = 0;
  Epetra_MultiVector * passxexact = 0;
  Epetra_MultiVector * passresid = 0;
  Epetra_MultiVector * passtmp = 0;

  Epetra_MultiVector x(*map_,numsolves);
  Epetra_MultiVector b(*map_,numsolves);
  Epetra_MultiVector xexact(*map_,numsolves);
  Epetra_MultiVector resid(*map_,numsolves);
  Epetra_MultiVector tmp(*map_,numsolves);

  Epetra_MultiVector serialx(*readMap,numsolves);
  Epetra_MultiVector serialb(*readMap,numsolves);
  Epetra_MultiVector serialxexact(*readMap,numsolves);
  Epetra_MultiVector serialresid(*readMap,numsolves);
  Epetra_MultiVector serialtmp(*readMap,numsolves);

  bool distribute_matrix = ( matrix_type == AMESOS_Distributed ) ; 
  if ( distribute_matrix ) { 
    //
    //  Initialize x, b and xexact to the values read in from the file
    //
    
    A.Export(*serialA, exporter, Add);
    Comm.Barrier();

    assert(A.FillComplete()==0);    
    Comm.Barrier();

    passA = &A; 
    passx = &x; 
    passb = &b;
    passxexact = &xexact;
    passresid = &resid;
    passtmp = &tmp;
  } else { 
    passA = serialA; 
    passx = &serialx; 
    passb = &serialb;
    passxexact = &serialxexact;
    passresid = &serialresid;
    passtmp = &serialtmp;
  }

  passxexact->SetSeed(131) ; 
  passxexact->Random();
  passx->SetSeed(11231) ; 
  passx->Random();

  passb->PutScalar( 0.0 );
  passA->Multiply( transpose, *passxexact, *passb ) ; 

  Epetra_MultiVector CopyB( *passb ) ;

  double Anorm = passA->NormInf() ; 
  SparseDirectTimingVars::SS_Result.Set_Anorm(Anorm) ;

  Epetra_LinearProblem Problem(  (Epetra_RowMatrix *) passA, 
				 (Epetra_MultiVector *) passx, 
				 (Epetra_MultiVector *) passb );

  double max_resid = 0.0;
  for ( int j = 0 ; j < special+1 ; j++ ) { 
    
    Epetra_Time TotalTime( Comm ) ; 
    if ( false ) { 
#ifdef TEST_UMFPACK

      unused code

    } else if ( SparseSolver == UMFPACK ) { 
      UmfpackOO umfpack( (Epetra_RowMatrix *) passA, 
			 (Epetra_MultiVector *) passx, 
			 (Epetra_MultiVector *) passb ) ; 
    
      umfpack.SetTrans( transpose ) ; 
      umfpack.Solve() ; 
#endif
#ifdef TEST_SUPERLU
    } else if ( SparseSolver == SuperLU ) { 
      SuperluserialOO superluserial( (Epetra_RowMatrix *) passA, 
				     (Epetra_MultiVector *) passx, 
				     (Epetra_MultiVector *) passb ) ; 

      superluserial.SetPermc( SuperLU_permc ) ; 
      superluserial.SetTrans( transpose ) ; 
      superluserial.SetUseDGSSV( special == 0 ) ; 
      superluserial.Solve() ; 
#endif
#ifdef HAVE_AMESOS_SLUD
    } else if ( SparseSolver == SuperLUdist ) { 
      SuperludistOO superludist( Problem ) ; 
      superludist.SetTrans( transpose ) ; 
      EPETRA_CHK_ERR( superludist.Solve( true ) ) ;
#endif 
#ifdef HAVE_AMESOS_SLUD2
    } else if ( SparseSolver == SuperLUdist2 ) { 
      Superludist2_OO superludist2( Problem ) ; 
      superludist2.SetTrans( transpose ) ; 
      EPETRA_CHK_ERR( superludist2.Solve( true ) ) ;
#endif 
#ifdef TEST_SPOOLES
    } else if ( SparseSolver == SPOOLES ) { 
      SpoolesOO spooles( (Epetra_RowMatrix *) passA, 
			 (Epetra_MultiVector *) passx, 
			 (Epetra_MultiVector *) passb ) ; 
    
      spooles.SetTrans( transpose ) ; 
      spooles.Solve() ; 
#endif
#ifdef HAVE_AMESOS_DSCPACK
    } else if ( SparseSolver == DSCPACK ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Dscpack dscpack( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( dscpack.SetParameters( ParamList ) ); 
    
      EPETRA_CHK_ERR( dscpack.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_UMFPACK
    } else if ( SparseSolver == UMFPACK ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Umfpack umfpack( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( umfpack.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( umfpack.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( umfpack.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_KLU
    } else if ( SparseSolver == KLU ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Klu klu( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( klu.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( klu.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( klu.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( klu.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( klu.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_PARAKLETE
    } else if ( SparseSolver == PARAKLETE ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Paraklete paraklete( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( paraklete.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( paraklete.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( paraklete.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( paraklete.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( paraklete.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_SLUS
    } else if ( SparseSolver == SuperLU ) { 
      Epetra_SLU superluserial( &Problem ) ; 
      EPETRA_CHK_ERR( superluserial.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( superluserial.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( superluserial.NumericFactorization(  ) ); 

      EPETRA_CHK_ERR( superluserial.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_LAPACK
    } else if ( SparseSolver == LAPACK ) { 
      Teuchos::ParameterList ParamList ;
      ParamList.set( "MaxProcs", -3 );
      Amesos_Lapack lapack( Problem ) ; 
      EPETRA_CHK_ERR( lapack.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( lapack.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( lapack.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( lapack.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_TAUCS
    } else if ( SparseSolver == TAUCS ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Taucs taucs( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( taucs.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( taucs.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( taucs.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( taucs.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( taucs.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_PARDISO
    } else if ( SparseSolver == PARDISO ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Pardiso pardiso( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( pardiso.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( pardiso.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( pardiso.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( pardiso.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( pardiso.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_PARKLETE
    } else if ( SparseSolver == PARKLETE ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Parklete parklete( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( parklete.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( parklete.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( parklete.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( parklete.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( parklete.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_MUMPS
    } else if ( SparseSolver == MUMPS ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Mumps mumps( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( mumps.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( mumps.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( mumps.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( mumps.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( mumps.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_SCALAPACK
    } else if ( SparseSolver == SCALAPACK ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Scalapack scalapack( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( scalapack.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( scalapack.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( scalapack.SymbolicFactorization( ) ); 
      EPETRA_CHK_ERR( scalapack.NumericFactorization( ) ); 
      EPETRA_CHK_ERR( scalapack.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_SUPERLUDIST
    } else if ( SparseSolver == SUPERLUDIST ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Superludist superludist( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( superludist.SetParameters( ParamList ) ); 

      EPETRA_CHK_ERR( superludist.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( superludist.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( superludist.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( superludist.Solve( ) ); 
#endif
#ifdef HAVE_AMESOS_SUPERLU
    } else if ( SparseSolver == SUPERLU ) { 
      Teuchos::ParameterList ParamList ;
      Amesos_Superlu superlu( Problem ) ; 
      ParamList.set( "MaxProcs", -3 );
      EPETRA_CHK_ERR( superlu.SetParameters( ParamList ) ); 
      EPETRA_CHK_ERR( superlu.SetUseTranspose( transpose ) ); 
    
      EPETRA_CHK_ERR( superlu.SymbolicFactorization(  ) ); 
      EPETRA_CHK_ERR( superlu.NumericFactorization(  ) ); 
      EPETRA_CHK_ERR( superlu.Solve( ) ); 
#endif
#ifdef TEST_SPOOLESSERIAL 
    } else if ( SparseSolver == SPOOLESSERIAL ) { 
      SpoolesserialOO spoolesserial( (Epetra_RowMatrix *) passA, 
				     (Epetra_MultiVector *) passx, 
				     (Epetra_MultiVector *) passb ) ; 
    
      spoolesserial.Solve() ;
#endif
    } else { 
      SparseDirectTimingVars::log_file << "Solver not implemented yet" << std::endl ;
      std::cerr << "\n\n####################  Requested solver not available (Or not tested with blocked RHS) on this platform #####################\n" << std::endl ;
    }

    SparseDirectTimingVars::SS_Result.Set_Total_Time( TotalTime.ElapsedTime() ); 
    //    SparseDirectTimingVars::SS_Result.Set_First_Time( 0.0 ); 
    //    SparseDirectTimingVars::SS_Result.Set_Middle_Time( 0.0 ); 
    //    SparseDirectTimingVars::SS_Result.Set_Last_Time( 0.0 ); 

    //
    //  Compute the error = norm(xcomp - xexact )
    //
    std::vector <double> error(numsolves) ; 
    double max_error = 0.0;
  
    passresid->Update(1.0, *passx, -1.0, *passxexact, 0.0);

    passresid->Norm2(&error[0]);
    for ( int i = 0 ; i< numsolves; i++ ) 
      if ( error[i] > max_error ) max_error = error[i] ; 
    SparseDirectTimingVars::SS_Result.Set_Error(max_error) ;

    //  passxexact->Norm2(&error[0] ) ; 
    //  passx->Norm2(&error ) ; 

    //
    //  Compute the residual = norm(Ax - b)
    //
    std::vector <double> residual(numsolves) ; 
  
    passtmp->PutScalar(0.0);
    passA->Multiply( transpose, *passx, *passtmp);
    passresid->Update(1.0, *passtmp, -1.0, *passb, 0.0); 
    //    passresid->Update(1.0, *passtmp, -1.0, CopyB, 0.0); 
    passresid->Norm2(&residual[0]);

    for ( int i = 0 ; i< numsolves; i++ ) 
      if ( residual[i] > max_resid ) max_resid = residual[i] ; 


    SparseDirectTimingVars::SS_Result.Set_Residual(max_resid) ;
    
    std::vector <double> bnorm(numsolves); 
    passb->Norm2( &bnorm[0] ) ; 
    SparseDirectTimingVars::SS_Result.Set_Bnorm(bnorm[0]) ;

    std::vector <double> xnorm(numsolves); 
    passx->Norm2( &xnorm[0] ) ; 
    SparseDirectTimingVars::SS_Result.Set_Xnorm(xnorm[0]) ;


    if ( false && iam == 0 ) { 

      std::cout << " Amesos_TestMutliSolver.cpp " << std::endl ; 
      for ( int i = 0 ; i< numsolves && i < 10 ; i++ ) {
	std::cout << "i=" << i 
	     << " error = " << error[i] 
	     << " xnorm = " << xnorm[i] 
	     << " residual = " << residual[i] 
	     << " bnorm = " << bnorm[i] 
	     << std::endl ; 
      
      }
    
      std::cout << std::endl << " max_resid = " << max_resid ; 
      std::cout << " max_error = " << max_error << std::endl ; 
      std::cout << " Get_residual() again = " << SparseDirectTimingVars::SS_Result.Get_Residual() << std::endl ;

    }
  }
  delete readA;
  delete readx;
  delete readb;
  delete readxexact;
  delete readMap;
  delete map_;
  
  Comm.Barrier();

return 0 ;
}
Exemple #21
0
void mglin(float ***u, int n, int ncycle){
/*
  Full Multigrid Algorithm for solution of the steady state heat
  equation with forcing.  On input u[1..n][1..n] contains the
  right-hand side ρ, while on output it returns the solution.  The
  dimension n must be of the form 2j + 1 for some integer j. (j is
  actually the number of grid levels used in the solution, called ng
  below.) ncycle is the number of V-cycles to be used at each level.
*/
  unsigned int j,jcycle,jj,jpost,jpre,nf,ng=0,ngrid,nn;
  /*** setup multigrid jagged arrays ***/
  float ***iu[NGMAX+1];   /* stores solution at each grid level */
  float ***irhs[NGMAX+1]; /* stores rhs at each grid level */
  float ***ires[NGMAX+1]; /* stores residual at each grid level */
  float ***irho[NGMAX+1]; /* stores rhs during intial solution of FMG */
  
  /*** use bitshift to find the number of grid levels, stored in ng ***/
  nn=n;                   
  while (nn >>= 1) ng++;     
  
  /*** some simple input checks ***/
  if (n != 1+(1L << ng)) nrerror("n-1 must be a power of 2 in mglin.");
  if (ng > NGMAX) nrerror("increase NGMAX in mglin.");
  
  /***restrict solution to next coarsest grid (irho[ng-1])***/
  nn=n/2+1;
  ngrid=ng-1;
  irho[ngrid]=f3tensor(1,nn,1,nn,1,nn); 
  rstrct(irho[ngrid],u,nn);/* coarsens rhs (u at this point) to irho on mesh size nn */
  
  /***continue setting up coarser grids down to coarsest level***/
  while (nn > 3) { 
    nn=nn/2+1; 
    irho[--ngrid]=f3tensor(1,nn,1,nn,1,nn);
    rstrct(irho[ngrid],irho[ngrid+1],nn); 
  }
  
  /***now setup and solve coarsest level iu[1],irhs[1] ***/
  nn=3;
  iu[1]=f3tensor(1,nn,1,nn,1,nn);
  irhs[1]=f3tensor(1,nn,1,nn,1,nn);
  slvsml(iu[1],irho[1]);          /* solve the small system directly */
  free_f3tensor(irho[1],1,nn,1,nn,1,nn);
  ngrid=ng;                       /* reset ngrid to original size */

  for (j=2;j<=ngrid;j++) {        /* loop over coarse to fine, starting at level 2 */
    printf("at grid level %d\n",j);
    nn=2*nn-1;                     
    iu[j]=f3tensor(1,nn,1,nn,1,nn);     /* setup grids for lhs,rhs, and residual */
    irhs[j]=f3tensor(1,nn,1,nn,1,nn);
    ires[j]=f3tensor(1,nn,1,nn,1,nn);
    interp(iu[j],iu[j-1],nn);
    /* irho contains rhs except on fine grid where it is in u */
    copy(irhs[j],(j != ngrid ? irho[j] : u),nn); 
    /* v-cycle at current grid level */
    for (jcycle=1;jcycle<=ncycle;jcycle++) {  
      /* nf is # points on finest grid for current v-sweep */
      nf=nn;                                  
      for (jj=j;jj>=2;jj--) {                 
	for (jpre=1;jpre<=NPRE;jpre++)  /* NPRE g-s sweeps on the finest (relatively) scale */
	  relax(iu[jj],iu[jj-1],irhs[jj],nf); //need iu[jj-1] for jacobi
	resid(ires[jj],iu[jj],irhs[jj],nf); /* compute res on finest scale, store in ires */
	nf=nf/2+1;                        /* next coarsest scale */
	rstrct(irhs[jj-1],ires[jj],nf);  /* restrict residuals as rhs of next coarsest scale */
	fill0(iu[jj-1],nf);              /* set the initial solution guess to zero */
      } 
      slvsml(iu[1],irhs[1]);                  /* solve the small problem exactly */
      nf=3;                                   /* fine scale now n=3 */
      for (jj=2;jj<=j;jj++) {                 /* work way back up to current finest grid */
	nf=2*nf-1;                            /* next finest scale */
	addint(iu[jj],iu[jj-1],ires[jj],nf);  /* inter error and add to previous solution guess */
	for (jpost=1;jpost<=NPOST;jpost++)    /* do NPOST g-s sweeps */
	  relax(iu[jj],iu[jj-1],irhs[jj],nf);
      }
    }
  }

  copy(u,iu[ngrid],n);              /* copy solution into input array (implicitly returned) */

  /*** clean up memory ***/
  for (nn=n,j=ng;j>=2;j--,nn=nn/2+1) {       
    free_f3tensor(ires[j],1,nn,1,nn,1,nn);      
    free_f3tensor(irhs[j],1,nn,1,nn,1,nn);
    free_f3tensor(iu[j],1,nn,1,nn,1,nn);
    if (j != ng) free_f3tensor(irho[j],1,nn,1,nn,1,nn);
  }
  free_f3tensor(irhs[1],1,3,1,3,1,3);
  free_f3tensor(iu[1],1,3,1,3,1,3);
}
Exemple #22
0
//get residual with inertia terms
const XC::Vector &XC::Twenty_Node_Brick::getResistingForceIncInertia(void) const
  {
        static XC::Vector res(60);

//        printf("getResistingForceIncInertia()\n");

        int i, j;
        static double a[60];

        for(i=0; i<nenu; i++) {
                const XC::Vector &accel = theNodes[i]->getTrialAccel();
                if( 3 != accel.Size() ) {
                        std::cerr << "XC::Twenty_Node_Brick::getResistingForceIncInertia matrix and vector sizes are incompatable\n";
                        exit(-1);
                }

                a[i*3] = accel(0);
                a[i*3+1] = accel(1);
                a[i*3+2] = accel(2);
        }
        // Compute the current resisting force
        this->getResistingForce();
//        std::cerr<<"K "<<resid<<std::endl;

        // Compute the mass matrix
        this->getMass();

        for(i = 0; i < 60; i++) {
                for(j = 0; j < 60; j++){
                        resid(i) += mass(i,j)*a[j];
                }
        }
//        printf("\n");
        //std::cerr<<"K+M "<<P<<std::endl;


        for(i=0; i<nenu; i++) {
                const XC::Vector &vel = theNodes[i]->getTrialVel();
                if( 3!= vel.Size() ) {
                        std::cerr << "XC::Twenty_Node_Brick::getResistingForceIncInertia matrix and vector sizes are incompatable\n";
                        exit(-1);
                }
                a[i*3] = vel(0);
                a[i*3+1] = vel(1);
                a[i*3+2] = vel(2);
        }

        this->getDamp();

        for(i = 0; i < 60; i++) {
                for(j = 0; j < 60; j++) {
                        resid(i) += damp(i,j)*a[j];
                }
        }
//        std::cerr<<"Pd"<<Pd<<std::endl;

        res = resid;
//        std::cerr<<"res "<<res<<std::endl;

//        exit(-1);
    if(isDead())
      res*=dead_srf;
    return res;
  }
Exemple #23
0
//get residual
const XC::Vector &XC::Twenty_Node_Brick::getResistingForce(void) const
  {
        int i, j, k, k1;
        double xsj;
        static XC::Matrix B(6, 3);
        double volume = 0.;

//        printf("calling getResistingForce()\n");
        resid.Zero();

        //compute basis vectors and local nodal coordinates
        computeBasis( ) ;
        //gauss loop to compute and save shape functions
        for( i = 0; i < nintu; i++ ) {
                // compute Jacobian and global shape functions
                Jacobian3d(i, xsj, 0);
                //volume element to also be saved
                dvolu[i] = wu[i] * xsj ;
                volume += dvolu[i];
        } // end for i
        //printf("volume = %f\n", volume);

        // Loop over the integration points
        for(i = 0; i < nintu; i++) {

                // Get material stress response
                const XC::Vector &sigma = physicalProperties[i]->getStress();

                // Perform numerical integration on internal force
                //P = P + (B^ sigma) * intWt(i)*intWt(j) * detJ;
                //P.addMatrixTransposeVector(1.0, B, sigma, intWt(i)*intWt(j)*detJ);
                for(j = 0; j < nenu; j++) {

                        B(0,0) = shgu[0][j][i];
                        B(0,1) = 0.;
                        B(0,2) = 0.;
                        B(1,0) = 0.;
                        B(1,1) = shgu[1][j][i];
                        B(1,2) = 0.;
                        B(2,0) = 0.;
                        B(2,1) = 0.;
                        B(2,2) = shgu[2][j][i];
                        B(3,0) = shgu[1][j][i];
                        B(3,1) = shgu[0][j][i];
                        B(3,2) = 0.;
                        B(4,0) = 0.;
                        B(4,1) = shgu[2][j][i];
                        B(4,2) = shgu[1][j][i];
                        B(5,0) = shgu[2][j][i];
                        B(5,1) = 0.;
                        B(5,2) = shgu[0][j][i];


                        for(k = 0; k < 3; k++) {
                                for(k1 = 0; k1 < 6; k1++)
                                        resid(j*3+k) += dvolu[i]*(B(k1,k)*sigma(k1));
                        }
                        // Subtract equiv. body forces from the nodes
                        //P = P - (N^ b) * intWt(i)*intWt(j) * detJ;
                        //P.addMatrixTransposeVector(1.0, N, b, -intWt(i)*intWt(j)*detJ);
                        double r = mixtureRho(i);
                        resid(j*3) -= dvolu[i]*(shgu[3][j][i]*r*bf[0]);
                        resid(j*3+1) -= dvolu[i]*(shgu[3][j][i]*r*bf[1]);
                        resid(j*3+2) -= dvolu[i]*(shgu[3][j][i]*r*bf[2]);
                }
        }

        // Subtract other external nodal loads ... P_res = P_int - P_ext
//        std::cerr<<"resid before:"<<resid<<std::endl;

        if(!load.isEmpty())
          resid-= load;

//        std::cerr<<"resid "<<resid<<std::endl;

    if(isDead())
      resid*=dead_srf;
    return resid ;
  }
void  ZeroLengthInterface2D::formLocalResidAndTangent( int tang_flag , int slave, int master1, int master2, int stage)
{
	// trial frictional force vectors (in local coordinate)
    double t_trial;
    double TtrNorm;
    // Coulomb friction law surface
	double Phi;
	int i, j;

	// set the first value to zero
	pressure(slave) = 0;
    t_trial=0;

	// int IsContact;
	// detect contact and set flag
    ContactFlag = contactDetect(slave,master1,master2, stage);

	if (ContactFlag == 1) // contacted
	{
	    // create a vector for converting local matrix to global
        GlobalResidAndTangentOrder(slave, master1, master2);

		// contact presure;
	    pressure(slave) = Kn * normal_gap(slave);  // pressure is positive if in contact

	    double ng = normal_gap(slave);

		t_trial = Kt * (shear_gap(slave) - stored_shear_gap(slave));  // trial shear force

        // Coulomb friction law, trial state
		//TtrNorm=t_trial.Norm();
		TtrNorm = sqrt(t_trial * t_trial);
		Phi = TtrNorm - fc * pressure(slave);

		if (Phi <= 0 ) { // stick case
 			if ( tang_flag == 1 ) {
		    // stiff
				for (i = 0; i < 6; i++) {
					for (j = 0; j < 6; j++) {
                        stiff(loctoglob[i],loctoglob[j]) +=   Kn * (N(i) * N(j)) + Kt * (T(i) * T(j));	//2D
					}
				}
			} //endif tang_flag
			// force
			for (i = 0; i < 6; i++) resid(loctoglob[i]) += pressure(slave) * N(i) + t_trial * T(i);    //2D
		} // end if stick
		else {           // slide case, non-symmetric stiff
            ContactFlag=2;  // set the contactFlag for sliding
            if ( tang_flag == 1 ) {
				// stiff
				for (i = 0; i < 6; i++) {
					for (j = 0; j < 6; j++) {
					stiff(loctoglob[i],loctoglob[j]) += Kn * (N(i) * N(j)) - fc * Kn * (t_trial / TtrNorm) * T(i) * N(j); //2D
					} //endfor i
				} //endfor j
   			    // force
			} // endif tang_flag
            double shear = fc * pressure(slave) * (t_trial/TtrNorm);
			for (i = 0; i < 6; i++) resid(loctoglob[i]) += (pressure(slave) * N(i)) + (shear * T(i)) ;      //2D
		} //endif slide
	}  // endif ContactFlag==1
}
Exemple #25
0
int invIteration(Epetra_CrsMatrix& A, double &lambda, bool verbose) {

  Ifpack_CrsRiluk * M;
  applyInverseSetup(A, M);
  Epetra_Vector q(A.RowMap());
  Epetra_Vector z(A.RowMap());
  Epetra_Vector resid(A.RowMap());

  Epetra_Flops * counter = A.GetFlopCounter();
  if (counter!=0) {
    q.SetFlopCounter(A);
    z.SetFlopCounter(A);
    resid.SetFlopCounter(A);
  }

  // Fill z with random Numbers
  z.Random();

  // variable needed for iteration
  double normz, residual;

  int niters = 100;
  double tolerance = 1.0E-6;
  int ierr = 1;

  for (int iter = 0; iter < niters; iter++)
    {
      if (verbose)
	cout << endl
	     << " ***** Performing step " << iter << " of inverse iteration ***** " << endl;

      z.Norm2(&normz); // Compute 2-norm of z
      q.Scale(1.0/normz, z);
      applyInverse(A, z, q, M, verbose); // Compute z such that Az = q
      q.Dot(z, &lambda); // Approximate maximum eigenvalue
      if (iter%10==0 || iter+1==niters)
	{
	  resid.Update(1.0, z, -lambda, q, 0.0); // Compute A(inv)*q - lambda*q
	  resid.Norm2(&residual);
	  cout << endl
	       << "***** Inverse Iteration Step " << iter+1 << endl
	       << "  Lambda = " << 1.0/lambda << endl
	       << "  Residual of A(inv)*q - lambda*q = "
	       << residual << endl;
	}
      if (residual < tolerance) {
	ierr = 0;
	break;
      }
    }
  // lambda is the largest eigenvalue of A(inv).  1/lambda is smallest eigenvalue of A.
  lambda = 1.0/lambda;

  // Compute A*q - lambda*q explicitly
  A.Multiply(false, q, z);
  resid.Update(1.0, z, -lambda, q, 0.0); // Compute A*q - lambda*q
  resid.Norm2(&residual);
  cout << "  Explicitly computed residual of A*q - lambda*q = " << residual << endl;
  applyInverseDestroy(M);
  return(ierr);
}
Exemple #26
0
//-----------------------------------------------------------------------------
ResourcePackage* Device::create_resource_package(const char* name)
{
	ResourceId resid("package", name);
	return create_resource_package((StringId64) resid.name);
}
int main(int argc, char *argv[]) {

/*-------------------------------------------------------------------------
c k is the current level. It is passed down through subroutine args
c and is NOT global. it is the current iteration
c------------------------------------------------------------------------*/

    int k, it;
    double t, tinit, mflops;
    int nthreads = 1;

/*-------------------------------------------------------------------------
c These arrays are in common because they are quite large
c and probably shouldn''t be allocated on the stack. They
c are always passed as subroutine args. 
c------------------------------------------------------------------------*/
    
    double **u, *v, **r;
    double a[4], c[4];

    double rnm2, rnmu;
    double epsilon = 1.0e-8;
    int n1, n2, n3, nit;
    double verify_value;
    boolean verified;

    int i, j, l;
    FILE *fp;

    timer_clear(T_BENCH);
    timer_clear(T_INIT);

    timer_start(T_INIT);

/*----------------------------------------------------------------------
c Read in and broadcast input data
c---------------------------------------------------------------------*/

    printf("\n\n NAS Parallel Benchmarks 2.3 OpenMP C version"
	   " - MG Benchmark\n\n");

    fp = fopen("mg.input", "r");
    if (fp != NULL) {
	printf(" Reading from input file mg.input\n");
	fscanf(fp, "%d", &lt);
	while(fgetc(fp) != '\n');
	fscanf(fp, "%d%d%d", &nx[lt], &ny[lt], &nz[lt]);
	while(fgetc(fp) != '\n');
	fscanf(fp, "%d", &nit);
	while(fgetc(fp) != '\n');
	for (i = 0; i <= 7; i++) {
	    fscanf(fp, "%d", &debug_vec[i]);
	}
	fclose(fp);
    } else {
	printf(" No input file. Using compiled defaults\n");
    
	lt = LT_DEFAULT;
	nit = NIT_DEFAULT;
	nx[lt] = NX_DEFAULT;
	ny[lt] = NY_DEFAULT;
	nz[lt] = NZ_DEFAULT;

	for (i = 0; i <= 7; i++) {
	    debug_vec[i] = DEBUG_DEFAULT;
	}
    }

    if ( (nx[lt] != ny[lt]) || (nx[lt] != nz[lt]) ) {
	Class = 'U';
    } else if( nx[lt] == 32 && nit == 4 ) {
	Class = 'S';
    } else if( nx[lt] == 64 && nit == 40 ) {
	Class = 'W';
    } else if( nx[lt] == 256 && nit == 20 ) {
	Class = 'B';
    } else if( nx[lt] == 512 && nit == 20 ) {
	Class = 'C';
    } else if( nx[lt] == 256 && nit == 4 ) {
	Class = 'A';
    } else {
	Class = 'U';
    }

/*--------------------------------------------------------------------
c  Use these for debug info:
c---------------------------------------------------------------------
c     debug_vec(0) = 1 !=> report all norms
c     debug_vec(1) = 1 !=> some setup information
c     debug_vec(1) = 2 !=> more setup information
c     debug_vec(2) = k => at level k or below, show result of resid
c     debug_vec(3) = k => at level k or below, show result of psinv
c     debug_vec(4) = k => at level k or below, show result of rprj
c     debug_vec(5) = k => at level k or below, show result of interp
c     debug_vec(6) = 1 => (unused)
c     debug_vec(7) = 1 => (unused)
c-------------------------------------------------------------------*/

    a[0] = -8.0/3.0;
    a[1] =  0.0;
    a[2] =  1.0/6.0;
    a[3] =  1.0/12.0;

    if (Class == 'A' || Class == 'S' || Class =='W') {
/*--------------------------------------------------------------------
c     Coefficients for the S(a) smoother
c-------------------------------------------------------------------*/
	c[0] =  -3.0/8.0;
	c[1] =  1.0/32.0;
	c[2] =  -1.0/64.0;
	c[3] =   0.0;
    } else {
/*--------------------------------------------------------------------
c     Coefficients for the S(b) smoother
c-------------------------------------------------------------------*/
	c[0] =  -3.0/17.0;
	c[1] =  1.0/33.0;
	c[2] =  -1.0/61.0;
	c[3] =   0.0;
    }
    
    lb = 1;

    setup(&n1,&n2,&n3,lt);
      
    /* Allocate the data arrays
     * 3d arrays are flattened and allocated as a contiguous block
     * 4d arrays are allocated as separate 3d blocks
     */
    u = (double **)malloc((lt+1)*sizeof(double *));
    for (l=lt; l >=1; l--)
      u[l] = (double *)malloc(m3[l]*m2[l]*m1[l]*sizeof(double));

    v = (double *)malloc(m3[lt]*m2[lt]*m1[lt]*sizeof(double));

    r = (double **)malloc((lt+1)*sizeof(double *));
    for (l=lt; l >=1; l--)
      r[l] = (double *)malloc(m3[l]*m2[l]*m1[l]*sizeof(double));

    // Array v can be treated using a standard OpenACC data region
#pragma acc data create(v[0:m3[lt]*m2[lt]*m1[lt]]) copyin(a[0:4],c[0:4])
    {

#ifdef _OPENACC
      //****************************************************************
      /* Now manually deep-create arrays u,r on the GPU using the Cray extended
       * runtime API, instead of using a data region
       */
      double **acc_u = (double **)cray_acc_create(u,(lt+1)*sizeof(double *));
      for (l=lt; l >=1; l--) {
	double *acc_ul = (double *)cray_acc_create(u[l],m3[l]*m2[l]*m1[l]*sizeof(double));
	SET_ACC_PTR(acc_u[l], acc_ul);
      }
      double **acc_r = (double **)cray_acc_create(r,(lt+1)*sizeof(double *));
      for (l=lt; l >=1; l--) {
	double *acc_rl = (double *)cray_acc_create(r[l],m3[l]*m2[l]*m1[l]*sizeof(double));
	SET_ACC_PTR(acc_r[l], acc_rl);
      }
    //****************************************************************
#endif /* _OPENACC */

#pragma omp parallel
{
    zero3(u[lt],n1,n2,n3);
}
    zran3(v,n1,n2,n3,nx[lt],ny[lt],lt);

#pragma omp parallel
{
    norm2u3(v,n1,n2,n3,&rnm2,&rnmu,nx[lt],ny[lt],nz[lt]);

#pragma omp single
{
/*    printf("\n norms of random v are\n");
    printf(" %4d%19.12e%19.12e\n", 0, rnm2, rnmu);
    printf(" about to evaluate resid, k= %d\n", lt);*/

    printf(" Size: %3dx%3dx%3d (class %1c)\n",
	   nx[lt], ny[lt], nz[lt], Class);
    printf(" Iterations: %3d\n", nit);
}

    resid(u[lt],v,r[lt],n1,n2,n3,a,lt);
    norm2u3(r[lt],n1,n2,n3,&rnm2,&rnmu,nx[lt],ny[lt],nz[lt]);

/*c---------------------------------------------------------------------
c     One iteration for startup
c---------------------------------------------------------------------*/
    mg3P(u,v,r,a,c,n1,n2,n3,lt);
    resid(u[lt],v,r[lt],n1,n2,n3,a,lt);

#pragma omp single
    setup(&n1,&n2,&n3,lt);

    zero3(u[lt],n1,n2,n3);
  } /* pragma omp parallel */

    zran3(v,n1,n2,n3,nx[lt],ny[lt],lt);

    timer_stop(T_INIT);
    timer_start(T_BENCH);

#pragma omp parallel firstprivate(nit) private(it)
  {
    resid(u[lt],v,r[lt],n1,n2,n3,a,lt);
    norm2u3(r[lt],n1,n2,n3,&rnm2,&rnmu,nx[lt],ny[lt],nz[lt]);

    for ( it = 1; it <= nit; it++) {
	mg3P(u,v,r,a,c,n1,n2,n3,lt);
	resid(u[lt],v,r[lt],n1,n2,n3,a,lt);
    }
    norm2u3(r[lt],n1,n2,n3,&rnm2,&rnmu,nx[lt],ny[lt],nz[lt]);

#if defined(_OPENMP)    
#pragma omp master
    nthreads = omp_get_num_threads();
#endif    
  } /* pragma omp parallel */

    timer_stop(T_BENCH);
    t = timer_read(T_BENCH);
    tinit = timer_read(T_INIT);

    verified = FALSE;
    verify_value = 0.0;

    printf(" Initialization time: %15.3f seconds\n", tinit);
    printf(" Benchmark completed\n");

    if (Class != 'U') {
	if (Class == 'S') {
            verify_value = 0.530770700573e-04;
	} else if (Class == 'W') {
            verify_value = 0.250391406439e-17;  /* 40 iterations*/
/*				0.183103168997d-044 iterations*/
	} else if (Class == 'A') {
            verify_value = 0.2433365309e-5;
        } else if (Class == 'B') {
            verify_value = 0.180056440132e-5;
        } else if (Class == 'C') {
            verify_value = 0.570674826298e-06;
	}

	if ( fabs( rnm2 - verify_value ) <= epsilon ) {
            verified = TRUE;
	    printf(" VERIFICATION SUCCESSFUL\n");
	    printf(" L2 Norm is %20.12e\n", rnm2);
	    printf(" Error is   %20.12e\n", rnm2 - verify_value);
	} else {
            verified = FALSE;
	    printf(" VERIFICATION FAILED\n");
	    printf(" L2 Norm is             %20.12e\n", rnm2);
	    printf(" The correct L2 Norm is %20.12e\n", verify_value);
	}
    } else {
	verified = FALSE;
	printf(" Problem size unknown\n");
	printf(" NO VERIFICATION PERFORMED\n");
    }

    if ( t != 0.0 ) {
	int nn = nx[lt]*ny[lt]*nz[lt];
	mflops = 58.*nit*nn*1.0e-6 / t;
    } else {
	mflops = 0.0;
    }

    c_print_results("MG", Class, nx[lt], ny[lt], nz[lt], 
		    nit, nthreads, t, mflops, "          floating point", 
		    verified, NPBVERSION, COMPILETIME,
		    CS1, CS2, CS3, CS4, CS5, CS6, CS7);
// I should probably deep-free the manually deep-created accelerator data here
} //acc end data
}
Exemple #28
0
int main(int argc, char *argv[]) {

#ifdef EPETRA_MPI
  MPI_Init(&argc,&argv);
  Epetra_MpiComm Comm (MPI_COMM_WORLD);
#else
  Epetra_SerialComm Comm;
#endif

  cout << Comm << endl;

  int MyPID = Comm.MyPID();

  bool verbose = false; 
  if (MyPID==0) verbose = true;

  if(argc < 2 && verbose) {
    cerr << "Usage: " << argv[0] 
	 << " HB_filename [level_fill [level_overlap [absolute_threshold [ relative_threshold]]]]" << endl
	 << "where:" << endl
	 << "HB_filename        - filename and path of a Harwell-Boeing data set" << endl
	 << "level_fill         - The amount of fill to use for ILU(k) preconditioner (default 0)" << endl
	 << "level_overlap      - The amount of overlap used for overlapping Schwarz subdomains (default 0)" << endl
	 << "absolute_threshold - The minimum value to place on the diagonal prior to factorization (default 0.0)" << endl
	 << "relative_threshold - The relative amount to perturb the diagonal prior to factorization (default 1.0)" << endl << endl
	 << "To specify a non-default value for one of these parameters, you must specify all" << endl
	 << " preceding values but not any subsequent parameters. Example:" << endl
	 << "ifpackHbSerialMsr.exe mymatrix.hb 1  - loads mymatrix.hb, uses level fill of one, all other values are defaults" << endl
	 << endl;
    return(1);

  }

  // Uncomment the next three lines to debug in mpi mode
  //int tmp;
  //if (MyPID==0) cin >> tmp;
  //Comm.Barrier();

  Epetra_Map * readMap;
  Epetra_CrsMatrix * readA; 
  Epetra_Vector * readx; 
  Epetra_Vector * readb;
  Epetra_Vector * readxexact;
   
  // Call routine to read in HB problem
  Trilinos_Util_ReadHb2Epetra(argv[1], Comm, readMap, readA, readx, readb, readxexact);

  // Create uniform distributed map
  Epetra_Map map(readMap->NumGlobalElements(), 0, Comm);

  // Create Exporter to distribute read-in matrix and vectors

  Epetra_Export exporter(*readMap, map);
  Epetra_CrsMatrix A(Copy, map, 0);
  Epetra_Vector x(map);
  Epetra_Vector b(map);
  Epetra_Vector xexact(map);

  Epetra_Time FillTimer(Comm);
  x.Export(*readx, exporter, Add);
  b.Export(*readb, exporter, Add);
  xexact.Export(*readxexact, exporter, Add);
  Comm.Barrier();
  double vectorRedistributeTime = FillTimer.ElapsedTime();
  A.Export(*readA, exporter, Add);
  Comm.Barrier();
  double matrixRedistributeTime = FillTimer.ElapsedTime() - vectorRedistributeTime;
  assert(A.FillComplete()==0);    
  Comm.Barrier();
  double fillCompleteTime = FillTimer.ElapsedTime() - matrixRedistributeTime;
  if (Comm.MyPID()==0)	{
    cout << "\n\n****************************************************" << endl;
    cout << "\n Vector redistribute  time (sec) = " << vectorRedistributeTime<< endl;
    cout << "    Matrix redistribute time (sec) = " << matrixRedistributeTime << endl;
    cout << "    Transform to Local  time (sec) = " << fillCompleteTime << endl<< endl;
  }
  Epetra_Vector tmp1(*readMap);
  Epetra_Vector tmp2(map);
  readA->Multiply(false, *readxexact, tmp1);

  A.Multiply(false, xexact, tmp2);
  double residual;
  tmp1.Norm2(&residual);
  if (verbose) cout << "Norm of Ax from file            = " << residual << endl;
  tmp2.Norm2(&residual);
  if (verbose) cout << "Norm of Ax after redistribution = " << residual << endl << endl << endl;

  //cout << "A from file = " << *readA << endl << endl << endl;

  //cout << "A after dist = " << A << endl << endl << endl;

  delete readA;
  delete readx;
  delete readb;
  delete readxexact;
  delete readMap;

  Comm.Barrier();

  // Construct ILU preconditioner

  double elapsed_time, total_flops, MFLOPs;
  Epetra_Time timer(Comm);

  int LevelFill = 0;
  if (argc > 2)  LevelFill = atoi(argv[2]);
  if (verbose) cout << "Using Level Fill = " << LevelFill << endl;
  int Overlap = 0;
  if (argc > 3) Overlap = atoi(argv[3]);
  if (verbose) cout << "Using Level Overlap = " << Overlap << endl;
  double Athresh = 0.0;
  if (argc > 4) Athresh = atof(argv[4]);
  if (verbose) cout << "Using Absolute Threshold Value of = " << Athresh << endl;

  double Rthresh = 1.0;
  if (argc > 5) Rthresh = atof(argv[5]);
  if (verbose) cout << "Using Relative Threshold Value of = " << Rthresh << endl;

  Ifpack_IlukGraph * IlukGraph = 0;
  Ifpack_CrsRiluk * ILUK = 0;

  if (LevelFill>-1) {
    elapsed_time = timer.ElapsedTime();
    IlukGraph = new Ifpack_IlukGraph(A.Graph(), LevelFill, Overlap);
    assert(IlukGraph->ConstructFilledGraph()==0);
    elapsed_time = timer.ElapsedTime() - elapsed_time;
    if (verbose) cout << "Time to construct ILUK graph = " << elapsed_time << endl;


    Epetra_Flops fact_counter;
  
    elapsed_time = timer.ElapsedTime();
    ILUK = new Ifpack_CrsRiluk(*IlukGraph);
    ILUK->SetFlopCounter(fact_counter);
    ILUK->SetAbsoluteThreshold(Athresh);
    ILUK->SetRelativeThreshold(Rthresh);
    //assert(ILUK->InitValues()==0);
    int initerr = ILUK->InitValues(A);
    if (initerr!=0) cout << Comm << "InitValues error = " << initerr;
    assert(ILUK->Factor()==0);
    elapsed_time = timer.ElapsedTime() - elapsed_time;
    total_flops = ILUK->Flops();
    MFLOPs = total_flops/elapsed_time/1000000.0;
    if (verbose) cout << "Time to compute preconditioner values = " 
		    << elapsed_time << endl
		    << "MFLOPS for Factorization = " << MFLOPs << endl;
    //cout << *ILUK << endl;
  }
  double Condest;
  ILUK->Condest(false, Condest);

  if (verbose) cout << "Condition number estimate for this preconditioner = " << Condest << endl;
  int Maxiter = 500;
  double Tolerance = 1.0E-14;

  Epetra_Vector xcomp(map);
  Epetra_Vector resid(map);

  Epetra_Flops counter;
  A.SetFlopCounter(counter);
  xcomp.SetFlopCounter(A);
  b.SetFlopCounter(A);
  resid.SetFlopCounter(A);
  ILUK->SetFlopCounter(A);

  elapsed_time = timer.ElapsedTime();

  BiCGSTAB(A, xcomp, b, ILUK, Maxiter, Tolerance, &residual, verbose);

  elapsed_time = timer.ElapsedTime() - elapsed_time;
  total_flops = counter.Flops();
  MFLOPs = total_flops/elapsed_time/1000000.0;
  if (verbose) cout << "Time to compute solution = " 
		    << elapsed_time << endl
		    << "Number of operations in solve = " << total_flops << endl
		    << "MFLOPS for Solve = " << MFLOPs<< endl << endl;

  resid.Update(1.0, xcomp, -1.0, xexact, 0.0); // resid = xcomp - xexact

  resid.Norm2(&residual);

  if (verbose) cout << "Norm of the difference between exact and computed solutions = " << residual << endl;

  


  if (ILUK!=0) delete ILUK;
  if (IlukGraph!=0) delete IlukGraph;
				       
#ifdef EPETRA_MPI
  MPI_Finalize() ;
#endif

return 0 ;
}
Exemple #29
0
void Ma57Solver::solve( OoqpVector& rhs_in )
{

  SimpleVector x_ma27(n), rhs_sav(n);
  x_ma27.copyFrom(rhs_in);
  rhs_sav.copyFrom(rhs_in);

	//    int job = 0; // Solve using A
	//    int one = 1;

	//    SimpleVectorHandle work( new SimpleVector(n) );
	//    SimpleVector & rhs = dynamic_cast<SimpleVector &>(rhs_in);

	//    double * drhs = rhs.elements();
	//    double * dwork  = work->elements();

	//    int * iwork = new int[n];

	//    rusage before;
	//    getrusage( RUSAGE_SELF, &before );

	//    FNAME(ma57cd)( &job,       &n,        
	//  	   fact,       &lfact,    ifact,  &lifact,  
	//  	   &one,       drhs,      &n,   
	//  	   dwork,      &n,        iwork, 
	//  	   icntl,      info );


	//    rusage after;
	//    getrusage( RUSAGE_SELF, &after );
	//    cout << "Solution with the factored matrix took "
	//         << (double) (after.ru_utime.tv_sec - before.ru_utime.tv_sec)
	//        + (after.ru_utime.tv_usec - before.ru_utime.tv_usec) / 1000000.0
	//  	 << " seconds.\n";

	//    delete [] iwork;
	int job = 0;
	if( freshFactor ) {
		icntl[8] = 1; // No iterative refinement
	} else {
		icntl[8] = 10; // Iterative refinement
	}    
	// MIKE: are these structure ever released??

	SimpleVectorHandle x( new SimpleVector(n) );
	SimpleVectorHandle resid( new SimpleVector(n) );
	SimpleVectorHandle work( new SimpleVector(5 * n) );
	SimpleVector & rhs = dynamic_cast<SimpleVector &>(rhs_in);

	double * drhs = rhs.elements();
	double * dx   = x->elements();
	double * dresid = resid->elements();
	double * dwork  = work->elements();

	int * iwork = new_iworkn(n);

  
  /*static int s = 0;
  int mype; MPI_Comm_rank(MPI_COMM_WORLD,&mype);
  if (mype == 0 && s==105) {
    printf("RHS IN\n\n");
    for (int i = 0; i < n; i++) {
      printf("%d: %.10E\n", i, rhs[i]);
    }
  }*/

#ifdef HAVE_GETRUSAGE
	rusage before;
	if( gOoqpPrintLevel >= 100 ) {
		getrusage( RUSAGE_SELF, &before );
	}
#endif

	int done = 0;
	int refactorizations = 0;
	int dontRefactor =  (kThresholdPivoting > kThresholdPivotingMax);
  while( !done && refactorizations < 10 ) {

    icntl[9]=1;//condition number

    FNAME(ma57dd)( &job,       &n,        &nnz,   M,        irowM,   jcolM,
        fact,       &lfact,    ifact,  &lifact,  drhs,    dx,
        dresid,      dwork,    iwork,  icntl,    cntl,    info,
        rinfo );
    if( resid->infnorm() < kPrecision*( 1 + rhs.infnorm() ) ) {
      // resids are fine, use them
      done = 1;
      cout << "Ma57: relative norm of residuals for linear system: " 
	   <<  resid->infnorm()/rhs.infnorm() << endl;
      cout << "Ma57: condition number: " << rinfo[10] << " " << rinfo[11] << " " << rinfo[12] << endl;
    } else {
      // resids aren't good enough.
      if( freshFactor ) { // We weren't doing iterative refinement,
        // let's do so
        job = 2;
        icntl[8] = 10;
        // Mark this factorization as stale
        freshFactor = 0;
        // And grow more pessimistic about the next factorization
        if( kThresholdPivoting >= kThresholdPivotingMax ) {
          // We have already refactored as with a high a pivtol as we
          // are willing to use
          dontRefactor = 1; 
        } else {
          // refactor with a higher Threshold Pivoting parameter
          kThresholdPivoting *= kThresholdPivotingFactor;
          if( kThresholdPivoting > kThresholdPivotingMax ) 
            kThresholdPivoting = kThresholdPivotingMax;
          this->setThresholdPivoting();
          cout << "Setting ThresholdPivoting parameter to " 
            << kThresholdPivoting << " for future factorizations" << endl;
        }
      } else if ( dontRefactor ) {
        // We might have tried a refactor, but the pivtol is above our
        // limit.
        done = 1;
      } else {
        // Otherwise, we have already tried iterative refinement, and
        // have already increased the ThresholdPivoting parameter
        cout << "Refactoring with Threshold Pivoting parameter" 
          << kThresholdPivoting << endl;
        this->matrixChanged();
        refactorizations++;
        // be optimistic about the next factorization
        job = 0;
        icntl[8] = 1;
      } // end else we hava already tried iterative refinement
    } // end else resids aren't good enough
  } // end while not done

  //!
  SimpleVector& x_ma57 = *x;  
  ma27->matrixChanged();
  ma27->solve(x_ma27);

  for(int i=0; i<x_ma57.length(); i++)
    if(fabs((x_ma57[i]-x_ma27[i])/(1+x_ma27[i])) >1) {
      printf("[%6d]  ma57:%22.14e  ma27:%22.14e \n", i, x_ma57[i], x_ma27[i]);
    }
  
  SimpleVector resid_ma57(n);
  resid_ma57.copyFrom(rhs_sav);
  sgm_->mult(1.0, resid_ma57.elements(), 1, -1.0, x_ma57.elements(), 1);
  cout << "MA57 resid.nrm=" << resid_ma57.infnorm() << "   rhs.nrm=" << rhs_sav.infnorm() 
       << "   n=" << n << endl;


  SimpleVector resid_ma27(n);
  resid_ma27.copyFrom(rhs_sav);
  sgm_->mult(-1.0, resid_ma27.elements(), 1, 1.0, x_ma27.elements(), 1);
  cout << "MA27 resid.nrm=" << resid_ma27.infnorm() << endl;


  rhs.copyFrom( *x );
  //rhs.copyFrom(x_ma27);

  //delete [] iwork; //it is cached now
}
Exemple #30
0
int main (int argc, char **argv)
{
    double Info [UMFPACK_INFO], Control [UMFPACK_CONTROL], *Ax, *Cx, *Lx, *Ux,
	*W, t [2], *Dx, rnorm, *Rb, *y, *Rs ;
    double *Az, *Lz, *Uz, *Dz, *Cz, *Rbz, *yz ;
    int *Ap, *Ai, *Cp, *Ci, row, col, p, lnz, unz, nr, nc, *Lp, *Li, *Ui, *Up,
	*P, *Q, *Lj, i, j, k, anz, nfr, nchains, *Qinit, fnpiv, lnz1, unz1, nz1,
	status, *Front_npivcol, *Front_parent, *Chain_start, *Wi, *Pinit, n1,
	*Chain_maxrows, *Chain_maxcols, *Front_1strow, *Front_leftmostdesc,
	nzud, do_recip ;
    void *Symbolic, *Numeric ;

    /* ---------------------------------------------------------------------- */
    /* initializations */
    /* ---------------------------------------------------------------------- */

    umfpack_tic (t) ;

    printf ("\nUMFPACK V%d.%d (%s) demo: _zi_ version\n",
	    UMFPACK_MAIN_VERSION, UMFPACK_SUB_VERSION, UMFPACK_DATE) ;

    /* get the default control parameters */
    umfpack_zi_defaults (Control) ;

    /* change the default print level for this demo */
    /* (otherwise, nothing will print) */
    Control [UMFPACK_PRL] = 6 ;

    /* print the license agreement */
    umfpack_zi_report_status (Control, UMFPACK_OK) ;
    Control [UMFPACK_PRL] = 5 ;

    /* print the control parameters */
    umfpack_zi_report_control (Control) ;

    /* ---------------------------------------------------------------------- */
    /* print A and b, and convert A to column-form */
    /* ---------------------------------------------------------------------- */

    /* print the right-hand-side */
    printf ("\nb: ") ;
    (void) umfpack_zi_report_vector (n, b, bz, Control) ;

    /* print the triplet form of the matrix */
    printf ("\nA: ") ;
    (void) umfpack_zi_report_triplet (n, n, nz, Arow, Acol, Aval, Avalz,
	Control) ;

    /* convert to column form */
    nz1 = MAX (nz,1) ;	/* ensure arrays are not of size zero. */
    Ap = (int *) malloc ((n+1) * sizeof (int)) ;
    Ai = (int *) malloc (nz1 * sizeof (int)) ;
    Ax = (double *) malloc (nz1 * sizeof (double)) ;
    Az = (double *) malloc (nz1 * sizeof (double)) ;
    if (!Ap || !Ai || !Ax || !Az)
    {
	error ("out of memory") ;
    }

    status = umfpack_zi_triplet_to_col (n, n, nz, Arow, Acol, Aval, Avalz,
	Ap, Ai, Ax, Az, (int *) NULL) ;

    if (status < 0)
    {
	umfpack_zi_report_status (Control, status) ;
	error ("umfpack_zi_triplet_to_col failed") ;
    }

    /* print the column-form of A */
    printf ("\nA: ") ;
    (void) umfpack_zi_report_matrix (n, n, Ap, Ai, Ax, Az, 1, Control) ;

    /* ---------------------------------------------------------------------- */
    /* symbolic factorization */
    /* ---------------------------------------------------------------------- */

    status = umfpack_zi_symbolic (n, n, Ap, Ai, Ax, Az, &Symbolic,
	Control, Info) ;
    if (status < 0)
    {
	umfpack_zi_report_info (Control, Info) ;
	umfpack_zi_report_status (Control, status) ;
	error ("umfpack_zi_symbolic failed") ;
    }

    /* print the symbolic factorization */

    printf ("\nSymbolic factorization of A: ") ;
    (void) umfpack_zi_report_symbolic (Symbolic, Control) ;

    /* ---------------------------------------------------------------------- */
    /* numeric factorization */
    /* ---------------------------------------------------------------------- */

    status = umfpack_zi_numeric (Ap, Ai, Ax, Az, Symbolic, &Numeric,
	Control, Info) ;
    if (status < 0)
    {
	umfpack_zi_report_info (Control, Info) ;
	umfpack_zi_report_status (Control, status) ;
	error ("umfpack_zi_numeric failed") ;
    }

    /* print the numeric factorization */
    printf ("\nNumeric factorization of A: ") ;
    (void) umfpack_zi_report_numeric (Numeric, Control) ;

    /* ---------------------------------------------------------------------- */
    /* solve Ax=b */
    /* ---------------------------------------------------------------------- */

    status = umfpack_zi_solve (UMFPACK_A, Ap, Ai, Ax, Az, x, xz, b, bz,
	Numeric, Control, Info) ;
    umfpack_zi_report_info (Control, Info) ;
    umfpack_zi_report_status (Control, status) ;
    if (status < 0)
    {
	error ("umfpack_zi_solve failed") ;
    }
    printf ("\nx (solution of Ax=b): ") ;
    (void) umfpack_zi_report_vector (n, x, xz, Control) ;
    rnorm = resid (FALSE, Ap, Ai, Ax, Az) ;
    printf ("maxnorm of residual: %g\n\n", rnorm) ;

    /* ---------------------------------------------------------------------- */
    /* compute the determinant */
    /* ---------------------------------------------------------------------- */

    status = umfpack_zi_get_determinant (x, xz, r, Numeric, Info) ;
    umfpack_zi_report_status (Control, status) ;
    if (status < 0)
    {
	error ("umfpack_zi_get_determinant failed") ;
    }
    printf ("determinant: (%g", x [0]) ;
    printf ("+ (%g)i", xz [0]) ; /* complex */
    printf (") * 10^(%g)\n", r [0]) ;

    /* ---------------------------------------------------------------------- */
    /* solve Ax=b, broken down into steps */
    /* ---------------------------------------------------------------------- */

    /* Rb = R*b */
    Rb  = (double *) malloc (n * sizeof (double)) ;
    Rbz = (double *) malloc (n * sizeof (double)) ;
    y   = (double *) malloc (n * sizeof (double)) ;
    yz  = (double *) malloc (n * sizeof (double)) ;
    if (!Rb || !y) error ("out of memory") ;
    if (!Rbz || !yz) error ("out of memory") ;

    status = umfpack_zi_scale (Rb, Rbz, b, bz, Numeric) ;
    if (status < 0) error ("umfpack_zi_scale failed") ;
    /* solve Ly = P*(Rb) */
    status = umfpack_zi_solve (UMFPACK_Pt_L, Ap, Ai, Ax, Az, y, yz, Rb, Rbz,
	Numeric, Control, Info) ;
    if (status < 0) error ("umfpack_zi_solve failed") ;
    /* solve UQ'x=y */
    status = umfpack_zi_solve (UMFPACK_U_Qt, Ap, Ai, Ax, Az, x, xz, y, yz,
	Numeric, Control, Info) ;
    if (status < 0) error ("umfpack_zi_solve failed") ;
    printf ("\nx (solution of Ax=b, solve is split into 3 steps): ") ;
    (void) umfpack_zi_report_vector (n, x, xz, Control) ;
    rnorm = resid (FALSE, Ap, Ai, Ax, Az) ;
    printf ("maxnorm of residual: %g\n\n", rnorm) ;

    free (Rb) ;
    free (Rbz) ;
    free (y) ;
    free (yz) ;

    /* ---------------------------------------------------------------------- */
    /* solve A'x=b */
    /* ---------------------------------------------------------------------- */

    /* note that this is the complex conjugate transpose, A' */
    status = umfpack_zi_solve (UMFPACK_At, Ap, Ai, Ax, Az, x, xz, b, bz,
	Numeric, Control, Info) ;
    umfpack_zi_report_info (Control, Info) ;
    if (status < 0)
    {
	error ("umfpack_zi_solve failed") ;
    }
    printf ("\nx (solution of A'x=b): ") ;
    (void) umfpack_zi_report_vector (n, x, xz, Control) ;
    rnorm = resid (TRUE, Ap, Ai, Ax, Az) ;
    printf ("maxnorm of residual: %g\n\n", rnorm) ;

    /* ---------------------------------------------------------------------- */
    /* modify one numerical value in the column-form of A */
    /* ---------------------------------------------------------------------- */

    /* change A (1,4), look for row index 1 in column 4. */
    row = 1 ;
    col = 4 ;
    for (p = Ap [col] ; p < Ap [col+1] ; p++)
    {
	if (row == Ai [p])
	{
	    printf ("\nchanging A (%d,%d) to zero\n", row, col) ;
	    Ax [p] = 0.0 ;
	    Az [p] = 0.0 ;
	    break ;
	}
    }
    printf ("\nmodified A: ") ;
    (void) umfpack_zi_report_matrix (n, n, Ap, Ai, Ax, Az, 1, Control) ;

    /* ---------------------------------------------------------------------- */
    /* redo the numeric factorization */
    /* ---------------------------------------------------------------------- */

    /* The pattern (Ap and Ai) hasn't changed, so the symbolic factorization */
    /* doesn't have to be redone, no matter how much we change Ax. */

    /* We don't need the Numeric object any more, so free it. */
    umfpack_zi_free_numeric (&Numeric) ;

    /* Note that a memory leak would have occurred if the old Numeric */
    /* had not been free'd with umfpack_zi_free_numeric above. */
    status = umfpack_zi_numeric (Ap, Ai, Ax, Az, Symbolic, &Numeric,
	Control, Info) ;
    if (status < 0)
    {
	umfpack_zi_report_info (Control, Info) ;
	umfpack_zi_report_status (Control, status) ;
	error ("umfpack_zi_numeric failed") ;
    }
    printf ("\nNumeric factorization of modified A: ") ;
    (void) umfpack_zi_report_numeric (Numeric, Control) ;

    /* ---------------------------------------------------------------------- */
    /* solve Ax=b, with the modified A */
    /* ---------------------------------------------------------------------- */

    status = umfpack_zi_solve (UMFPACK_A, Ap, Ai, Ax, Az, x, xz, b, bz,
	Numeric, Control, Info) ;
    umfpack_zi_report_info (Control, Info) ;
    if (status < 0)
    {
	umfpack_zi_report_status (Control, status) ;
	error ("umfpack_zi_solve failed") ;
    }
    printf ("\nx (with modified A): ") ;
    (void) umfpack_zi_report_vector (n, x, xz, Control) ;
    rnorm = resid (FALSE, Ap, Ai, Ax, Az) ;
    printf ("maxnorm of residual: %g\n\n", rnorm) ;

    /* ---------------------------------------------------------------------- */
    /* modify all of the numerical values of A, but not the pattern */
    /* ---------------------------------------------------------------------- */

    for (col = 0 ; col < n ; col++)
    {
	for (p = Ap [col] ; p < Ap [col+1] ; p++)
	{
	    row = Ai [p] ;
	    printf ("changing ") ;
	    /* complex: */ printf ("real part of ") ;
	    printf ("A (%d,%d) from %g", row, col, Ax [p]) ;
	    Ax [p] = Ax [p] + col*10 - row ;
	    printf (" to %g\n", Ax [p]) ;
	}
    }
    printf ("\ncompletely modified A (same pattern): ") ;
    (void) umfpack_zi_report_matrix (n, n, Ap, Ai, Ax, Az, 1, Control) ;

    /* ---------------------------------------------------------------------- */
    /* save the Symbolic object to file, free it, and load it back in */
    /* ---------------------------------------------------------------------- */

    /* use the default filename, "symbolic.umf" */
    printf ("\nSaving symbolic object:\n") ;
    status = umfpack_zi_save_symbolic (Symbolic, (char *) NULL) ;
    if (status < 0)
    {
	umfpack_zi_report_status (Control, status) ;
	error ("umfpack_zi_save_symbolic failed") ;
    }
    printf ("\nFreeing symbolic object:\n") ;
    umfpack_zi_free_symbolic (&Symbolic) ;
    printf ("\nLoading symbolic object:\n") ;
    status = umfpack_zi_load_symbolic (&Symbolic, (char *) NULL) ;
    if (status < 0)
    {
	umfpack_zi_report_status (Control, status) ;
	error ("umfpack_zi_load_symbolic failed") ;
    }
    printf ("\nDone loading symbolic object\n") ;

    /* ---------------------------------------------------------------------- */
    /* redo the numeric factorization */
    /* ---------------------------------------------------------------------- */

    umfpack_zi_free_numeric (&Numeric) ;
    status = umfpack_zi_numeric (Ap, Ai, Ax, Az, Symbolic, &Numeric,
	Control, Info) ;
    if (status < 0)
    {
	umfpack_zi_report_info (Control, Info) ;
	umfpack_zi_report_status (Control, status) ;
	error ("umfpack_zi_numeric failed") ;
    }
    printf ("\nNumeric factorization of completely modified A: ") ;
    (void) umfpack_zi_report_numeric (Numeric, Control) ;

    /* ---------------------------------------------------------------------- */
    /* solve Ax=b, with the modified A */
    /* ---------------------------------------------------------------------- */

    status = umfpack_zi_solve (UMFPACK_A, Ap, Ai, Ax, Az, x, xz, b, bz,
	Numeric, Control, Info) ;
    umfpack_zi_report_info (Control, Info) ;
    if (status < 0)
    {
	umfpack_zi_report_status (Control, status) ;
	error ("umfpack_zi_solve failed") ;
    }
    printf ("\nx (with completely modified A): ") ;
    (void) umfpack_zi_report_vector (n, x, xz, Control) ;
    rnorm = resid (FALSE, Ap, Ai, Ax, Az) ;
    printf ("maxnorm of residual: %g\n\n", rnorm) ;

    /* ---------------------------------------------------------------------- */
    /* free the symbolic and numeric factorization */
    /* ---------------------------------------------------------------------- */

    umfpack_zi_free_symbolic (&Symbolic) ;
    umfpack_zi_free_numeric (&Numeric) ;

    /* ---------------------------------------------------------------------- */
    /* C = transpose of A */
    /* ---------------------------------------------------------------------- */

    Cp = (int *) malloc ((n+1) * sizeof (int)) ;
    Ci = (int *) malloc (nz1 * sizeof (int)) ;
    Cx = (double *) malloc (nz1 * sizeof (double)) ;
    Cz = (double *) malloc (nz1 * sizeof (double)) ;
    if (!Cp || !Ci || !Cx || !Cz)
    {
	error ("out of memory") ;
    }
    status = umfpack_zi_transpose (n, n, Ap, Ai, Ax, Az,
	(int *) NULL, (int *) NULL, Cp, Ci, Cx, Cz, TRUE) ;
    if (status < 0)
    {
	umfpack_zi_report_status (Control, status) ;
	error ("umfpack_zi_transpose failed: ") ;
    }
    printf ("\nC (transpose of A): ") ;
    (void) umfpack_zi_report_matrix (n, n, Cp, Ci, Cx, Cz, 1, Control) ;

    /* ---------------------------------------------------------------------- */
    /* symbolic factorization of C */
    /* ---------------------------------------------------------------------- */

    status = umfpack_zi_symbolic (n, n, Cp, Ci, Cx, Cz, &Symbolic,
	Control, Info) ;
    if (status < 0)
    {
	umfpack_zi_report_info (Control, Info) ;
	umfpack_zi_report_status (Control, status) ;
	error ("umfpack_zi_symbolic failed") ;
    }
    printf ("\nSymbolic factorization of C: ") ;
    (void) umfpack_zi_report_symbolic (Symbolic, Control) ;

    /* ---------------------------------------------------------------------- */
    /* copy the contents of Symbolic into user arrays print them */
    /* ---------------------------------------------------------------------- */

    printf ("\nGet the contents of the Symbolic object for C:\n") ;
    printf ("(compare with umfpack_zi_report_symbolic output, above)\n") ;
    Pinit = (int *) malloc ((n+1) * sizeof (int)) ;
    Qinit = (int *) malloc ((n+1) * sizeof (int)) ;
    Front_npivcol = (int *) malloc ((n+1) * sizeof (int)) ;
    Front_1strow = (int *) malloc ((n+1) * sizeof (int)) ;
    Front_leftmostdesc = (int *) malloc ((n+1) * sizeof (int)) ;
    Front_parent = (int *) malloc ((n+1) * sizeof (int)) ;
    Chain_start = (int *) malloc ((n+1) * sizeof (int)) ;
    Chain_maxrows = (int *) malloc ((n+1) * sizeof (int)) ;
    Chain_maxcols = (int *) malloc ((n+1) * sizeof (int)) ;
    if (!Pinit || !Qinit || !Front_npivcol || !Front_parent || !Chain_start ||
	!Chain_maxrows || !Chain_maxcols || !Front_1strow ||
	!Front_leftmostdesc)
    {
	error ("out of memory") ;
    }

    status = umfpack_zi_get_symbolic (&nr, &nc, &n1, &anz, &nfr, &nchains,
	Pinit, Qinit, Front_npivcol, Front_parent, Front_1strow,
	Front_leftmostdesc, Chain_start, Chain_maxrows, Chain_maxcols,
	Symbolic) ;

    if (status < 0)
    {
	error ("symbolic factorization invalid") ;
    }

    printf ("From the Symbolic object, C is of dimension %d-by-%d\n", nr, nc);
    printf ("   with nz = %d, number of fronts = %d,\n", nz, nfr) ;
    printf ("   number of frontal matrix chains = %d\n", nchains) ;

    printf ("\nPivot columns in each front, and parent of each front:\n") ;
    k = 0 ;
    for (i = 0 ; i < nfr ; i++)
    {
	fnpiv = Front_npivcol [i] ;
	printf ("    Front %d: parent front: %d number of pivot cols: %d\n",
		i, Front_parent [i], fnpiv) ;
	for (j = 0 ; j < fnpiv ; j++)
	{
	    col = Qinit [k] ;
	    printf (
	    "        %d-th pivot column is column %d in original matrix\n",
		k, col) ;
	    k++ ;
	}
    }

    printf ("\nNote that the column ordering, above, will be refined\n") ;
    printf ("in the numeric factorization below.  The assignment of pivot\n") ;
    printf ("columns to frontal matrices will always remain unchanged.\n") ;

    printf ("\nTotal number of pivot columns in frontal matrices: %d\n", k) ;

    printf ("\nFrontal matrix chains:\n") ;
    for (j = 0 ; j < nchains ; j++)
    {
	printf ("   Frontal matrices %d to %d are factorized in a single\n",
	    Chain_start [j], Chain_start [j+1] - 1) ;
	printf ("        working array of size %d-by-%d\n",
	    Chain_maxrows [j], Chain_maxcols [j]) ;
    }

    /* ---------------------------------------------------------------------- */
    /* numeric factorization of C */
    /* ---------------------------------------------------------------------- */

    status = umfpack_zi_numeric (Cp, Ci, Cx, Cz, Symbolic, &Numeric,
	Control, Info) ;
    if (status < 0)
    {
	error ("umfpack_zi_numeric failed") ;
    }
    printf ("\nNumeric factorization of C: ") ;
    (void) umfpack_zi_report_numeric (Numeric, Control) ;

    /* ---------------------------------------------------------------------- */
    /* extract the LU factors of C and print them */
    /* ---------------------------------------------------------------------- */

    if (umfpack_zi_get_lunz (&lnz, &unz, &nr, &nc, &nzud, Numeric) < 0)
    {
	error ("umfpack_zi_get_lunz failed") ;
    }
    /* ensure arrays are not of zero size */
    lnz1 = MAX (lnz,1) ;
    unz1 = MAX (unz,1) ;
    Lp = (int *) malloc ((n+1) * sizeof (int)) ;
    Lj = (int *) malloc (lnz1 * sizeof (int)) ;
    Lx = (double *) malloc (lnz1 * sizeof (double)) ;
    Lz = (double *) malloc (lnz1 * sizeof (double)) ;
    Up = (int *) malloc ((n+1) * sizeof (int)) ;
    Ui = (int *) malloc (unz1 * sizeof (int)) ;
    Ux = (double *) malloc (unz1 * sizeof (double)) ;
    Uz = (double *) malloc (unz1 * sizeof (double)) ;
    P = (int *) malloc (n * sizeof (int)) ;
    Q = (int *) malloc (n * sizeof (int)) ;
    Dx = (double *) NULL ;	/* D vector not requested */
    Dz = (double *) NULL ;
    Rs  = (double *) malloc (n * sizeof (double)) ;
    if (!Lp || !Lj || !Lx || !Lz || !Up || !Ui || !Ux || !Uz || !P || !Q || !Rs)
    {
	error ("out of memory") ;
    }
    status = umfpack_zi_get_numeric (Lp, Lj, Lx, Lz, Up, Ui, Ux, Uz,
	P, Q, Dx, Dz, &do_recip, Rs, Numeric) ;
    if (status < 0)
    {
	error ("umfpack_zi_get_numeric failed") ;
    }

    printf ("\nL (lower triangular factor of C): ") ;
    (void) umfpack_zi_report_matrix (n, n, Lp, Lj, Lx, Lz, 0, Control) ;
    printf ("\nU (upper triangular factor of C): ") ;
    (void) umfpack_zi_report_matrix (n, n, Up, Ui, Ux, Uz, 1, Control) ;
    printf ("\nP: ") ;
    (void) umfpack_zi_report_perm (n, P, Control) ;
    printf ("\nQ: ") ;
    (void) umfpack_zi_report_perm (n, Q, Control) ;
    printf ("\nScale factors: row i of A is to be ") ;
    if (do_recip)
    {
	printf ("multiplied by the ith scale factor\n") ;
    }
    else
    {
	printf ("divided by the ith scale factor\n") ;
    }
    for (i = 0 ; i < n ; i++) printf ("%d: %g\n", i, Rs [i]) ;

    /* ---------------------------------------------------------------------- */
    /* convert L to triplet form and print it */
    /* ---------------------------------------------------------------------- */

    /* Note that L is in row-form, so it is the row indices that are created */
    /* by umfpack_zi_col_to_triplet. */

    printf ("\nConverting L to triplet form, and printing it:\n") ;
    Li = (int *) malloc (lnz1 * sizeof (int)) ;
    if (!Li)
    {
	error ("out of memory") ;
    }
    if (umfpack_zi_col_to_triplet (n, Lp, Li) < 0)
    {
	error ("umfpack_zi_col_to_triplet failed") ;
    }
    printf ("\nL, in triplet form: ") ;
    (void) umfpack_zi_report_triplet (n, n, lnz, Li, Lj, Lx, Lz, Control) ;

    /* ---------------------------------------------------------------------- */
    /* save the Numeric object to file, free it, and load it back in */
    /* ---------------------------------------------------------------------- */

    /* use the default filename, "numeric.umf" */
    printf ("\nSaving numeric object:\n") ;
    status = umfpack_zi_save_numeric (Numeric, (char *) NULL) ;
    if (status < 0)
    {
	umfpack_zi_report_status (Control, status) ;
	error ("umfpack_zi_save_numeric failed") ;
    }
    printf ("\nFreeing numeric object:\n") ;
    umfpack_zi_free_numeric (&Numeric) ;
    printf ("\nLoading numeric object:\n") ;
    status = umfpack_zi_load_numeric (&Numeric, (char *) NULL) ;
    if (status < 0)
    {
	umfpack_zi_report_status (Control, status) ;
	error ("umfpack_zi_load_numeric failed") ;
    }
    printf ("\nDone loading numeric object\n") ;

    /* ---------------------------------------------------------------------- */
    /* solve C'x=b */
    /* ---------------------------------------------------------------------- */

    status = umfpack_zi_solve (UMFPACK_At, Cp, Ci, Cx, Cz, x, xz, b, bz,
	Numeric, Control, Info) ;
    umfpack_zi_report_info (Control, Info) ;
    if (status < 0)
    {
	umfpack_zi_report_status (Control, status) ;
	error ("umfpack_zi_solve failed") ;
    }
    printf ("\nx (solution of C'x=b): ") ;
    (void) umfpack_zi_report_vector (n, x, xz, Control) ;
    rnorm = resid (TRUE, Cp, Ci, Cx, Cz) ;
    printf ("maxnorm of residual: %g\n\n", rnorm) ;

    /* ---------------------------------------------------------------------- */
    /* solve C'x=b again, using umfpack_zi_wsolve instead */
    /* ---------------------------------------------------------------------- */

    printf ("\nSolving C'x=b again, using umfpack_zi_wsolve instead:\n") ;
    Wi = (int *) malloc (n * sizeof (int)) ;
    W = (double *) malloc (10*n * sizeof (double)) ;
    if (!Wi || !W)
    {
	error ("out of memory") ;
    }

    status = umfpack_zi_wsolve (UMFPACK_At, Cp, Ci, Cx, Cz, x, xz, b, bz,
	Numeric, Control, Info, Wi, W) ;
    umfpack_zi_report_info (Control, Info) ;
    if (status < 0)
    {
	umfpack_zi_report_status (Control, status) ;
	error ("umfpack_zi_wsolve failed") ;
    }
    printf ("\nx (solution of C'x=b): ") ;
    (void) umfpack_zi_report_vector (n, x, xz, Control) ;
    rnorm = resid (TRUE, Cp, Ci, Cx, Cz) ;
    printf ("maxnorm of residual: %g\n\n", rnorm) ;

    /* ---------------------------------------------------------------------- */
    /* free everything */
    /* ---------------------------------------------------------------------- */

    /* This is not strictly required since the process is exiting and the */
    /* system will reclaim the memory anyway.  It's useful, though, just as */
    /* a list of what is currently malloc'ed by this program.  Plus, it's */
    /* always a good habit to explicitly free whatever you malloc. */

    free (Ap) ;
    free (Ai) ;
    free (Ax) ;
    free (Az) ;

    free (Cp) ;
    free (Ci) ;
    free (Cx) ;
    free (Cz) ;

    free (Pinit) ;
    free (Qinit) ;
    free (Front_npivcol) ;
    free (Front_1strow) ;
    free (Front_leftmostdesc) ;
    free (Front_parent) ;
    free (Chain_start) ;
    free (Chain_maxrows) ;
    free (Chain_maxcols) ;

    free (Lp) ;
    free (Lj) ;
    free (Lx) ;
    free (Lz) ;

    free (Up) ;
    free (Ui) ;
    free (Ux) ;
    free (Uz) ;

    free (P) ;
    free (Q) ;

    free (Li) ;

    free (Wi) ;
    free (W) ;

    umfpack_zi_free_symbolic (&Symbolic) ;
    umfpack_zi_free_numeric (&Numeric) ;

    /* ---------------------------------------------------------------------- */
    /* print the total time spent in this demo */
    /* ---------------------------------------------------------------------- */

    umfpack_toc (t) ;
    printf ("\numfpack_zi_demo complete.\nTotal time: %5.2f seconds"
	" (CPU time), %5.2f seconds (wallclock time)\n", t [1], t [0]) ;
    return (0) ;
}