Пример #1
bool NewtonSolver(const DataStructure& Mesh, const sparse& Diff, tensor& x, vector& dt, int Order) 
	int theta, GOrder, Npts = Mesh.GetNverts(), max_iters = 20;
	vector cm(1,.0);

	if(t >= (Order-1) * dt0)
		GOrder = Order;
	else {
		theta = (t + dt0)/dt0;
		GOrder = theta;

	vector BDF1 = Construct_BDF(GOrder,dt), BDF2(GOrder,.0);

	matrix RHS1(Nc,Npts), RHS2(Nc,Npts);

	for(int j = 0; j < Nc; j++)
		for(int i = 1; i < GOrder + 1; i++)
			RHS1[j] -= BDF1(i) * x(j)[i];

	if(GOrder > 1) {
		 BDF2 = Construct_BDF(GOrder-1,dt);

		 for(int j = 0; j < Nc; j++)
			for(int i = 1; i < GOrder; i++)
				 RHS2[j] -= BDF2(i) * x(j)[i];
	vector x1 = x(0)[0], x2 = x(1)[0], x3 = x(2)[0], x4 = x(3)[0], x5 = x(4)[0];

	double error = 1.0, tol = pow(10.0,-6.0), est = .0, abs_error = 0.0001, rel_error = abs_error;
	int iters = 0;
	while(error > tol) {
		error = FixedPointStep(Mesh,Diff,RHS1,x1,x2,x3,x4,x5,BDF1(0));
		if(iters > max_iters)
	cout << "FPI iters = " << iters << endl;
	x(0)[Order+2] = x1; x(1)[Order+2] = x2; x(2)[Order+2] = x3; x(3)[Order+2] = x4; 
	x(4)[Order+2] = x5;
	if(GOrder > 1 && iters <= max_iters) {
		vector err(Nc*Npts,.0);
		matrix deriv1 = RHS1, deriv2 = RHS2;
		for(int i = 0; i < Nc; i++) {
			deriv1[i] -= BDF1(0) * x(i)[Order+2];
			deriv2[i] -= BDF2(0) * x(i)[Order+2];
		for(int i = 0; i < Nc; i++)
			for(int j = 0; j < Npts; j++)
				if(fabs(deriv1[i](j)) > pow(10.0,-8.0))
					err(i*Npts+j) = (fabs(deriv1[i](j)) - fabs(deriv2[i](j))); // / fabs(deriv1[i](j));

		est = err.Norm(); double deno = .0;
		for(int i = 0; i < Nc; i++)
			deno += deriv1[i].Norm(); est = est / deno; 

	if(iters > max_iters) {
		cout << "Failure to converge with attempted timestep = " << dt(0) << endl;
		est = rel_error * 2.0; }

	if( est < rel_error) {

		for(int i = 0; i < Nc; i++)
			x(i)[0] = x(i)[Order+2];

		for(int i = 0; i < Nc; i++)
			for(int j = Order; j > 0; j--)
				x(i)[j] = x(i)[j-1];

		t += dt(0);
		vector time(1,dt(0));
		cm(0) = x(2)[0].Amax();


		for(int i = Order - 1; i > 0; i--)
			dt(i) = dt(i-1);

		cout << "Newton Adaptive BDF Solver Converged with c_max = " << cm << " at time " << t << endl; 

	if(GOrder > 1 && cm(0) < (c3 - dc))
		dt(0) = min(tmax,0.8 * dt(0) *  pow(rel_error/est,1.0/(GOrder)));
		if(cm(0) >= (c3 - dc))
			dt(0) = min(tmin,0.8 * dt(0) * pow(rel_error/est,1.0/GOrder));
	return 0;
Пример #2
int Ifpack_ICT::Compute() 
  if (!IsInitialized()) 

  IsComputed_ = false;

  NumMyRows_ = A_.NumMyRows();
  int Length = A_.MaxNumEntries();
  vector<int>    RowIndices(Length);
  vector<double> RowValues(Length);

  bool distributed = (Comm().NumProc() > 1)?true:false;

  if (distributed)
    SerialComm_ = Teuchos::rcp(new Epetra_SerialComm);
    SerialMap_ = Teuchos::rcp(new Epetra_Map(NumMyRows_, 0, *SerialComm_));
    assert (SerialComm_.get() != 0);
    assert (SerialMap_.get() != 0);
    SerialMap_ = Teuchos::rcp(const_cast<Epetra_Map*>(&A_.RowMatrixRowMap()), false);

  int RowNnz;
  double flops = 0.0;

  H_ = Teuchos::rcp(new Epetra_CrsMatrix(Copy,*SerialMap_,0));
  if (H_.get() == 0)
    IFPACK_CHK_ERR(-5); // memory allocation error

  // get A(0,0) element and insert it (after sqrt)

  // skip off-processor elements
  if (distributed)
    int count = 0;
    for (int i = 0 ;i < RowNnz ; ++i) 
      if (RowIndices[i] < NumMyRows_){
        RowIndices[count] = RowIndices[i];
        RowValues[count] = RowValues[i];
    RowNnz = count;

  // modify diagonal
  double diag_val = 0.0;
  for (int i = 0 ;i < RowNnz ; ++i) {
    if (RowIndices[i] == 0) {
      double& v = RowValues[i];
      diag_val = AbsoluteThreshold() * EPETRA_SGN(v) +
        RelativeThreshold() * v;

  diag_val = sqrt(diag_val);
  int diag_idx = 0;
  EPETRA_CHK_ERR(H_->InsertGlobalValues(0,1,&diag_val, &diag_idx));

  // The 10 is just a small constant to limit collisons as the actual keys
  // we store are the indices and not integers
  // [0..A_.MaxNumEntries()*LevelofFill()].
  Ifpack_HashTable Hash( 10 * A_.MaxNumEntries() * LevelOfFill(), 1);

  // start factorization for line 1
  for (int row_i = 1 ; row_i < NumMyRows_ ; ++row_i) {

    // get row `row_i' of the matrix

    // skip off-processor elements
    if (distributed)
      int count = 0;
      for (int i = 0 ;i < RowNnz ; ++i) 
        if (RowIndices[i] < NumMyRows_){
          RowIndices[count] = RowIndices[i];
          RowValues[count] = RowValues[i];
      RowNnz = count;

    // number of nonzeros in this row are defined as the nonzeros
    // of the matrix, plus the level of fill 
    int LOF = (int)(LevelOfFill() * RowNnz);
    if (LOF == 0) LOF = 1;

    // convert line `row_i' into hash for fast access

    double h_ii = 0.0;
    for (int i = 0 ; i < RowNnz ; ++i) {
      if (RowIndices[i] == row_i) {
        double& v = RowValues[i];
        h_ii = AbsoluteThreshold() * EPETRA_SGN(v) + RelativeThreshold() * v;
      else if (RowIndices[i] < row_i)
        Hash.set(RowIndices[i], RowValues[i], true);
    // form element (row_i, col_j)
    // I start from the first row that has a nonzero column
    // index in row_i.
    for (int col_j = RowIndices[0] ; col_j < row_i ; ++col_j) {

      double h_ij = 0.0, h_jj = 0.0;
      // note: get() returns 0.0 if col_j is not found
      h_ij = Hash.get(col_j);

      // get pointers to row `col_j'
      int* ColIndices;
      double* ColValues;
      int ColNnz;
      H_->ExtractGlobalRowView(col_j, ColNnz, ColValues, ColIndices);

      for (int k = 0 ; k < ColNnz ; ++k) {
        int col_k = ColIndices[k];

        if (col_k == col_j)
          h_jj = ColValues[k];
        else {
          double xxx = Hash.get(col_k);
          if (xxx != 0.0)
            h_ij -= ColValues[k] * xxx;
            flops += 2.0;

      h_ij /= h_jj;

      if (IFPACK_ABS(h_ij) > DropTolerance_)
        Hash.set(col_j, h_ij);
      // only approx
      ComputeFlops_ += 2.0 * flops + 1.0;

    int size = Hash.getNumEntries();

    vector<double> AbsRow(size);
    int count = 0;
    // +1 because I use the extra position for diagonal in insert
    vector<int> keys(size + 1);
    vector<double> values(size + 1);

    Hash.arrayify(&keys[0], &values[0]);

    for (int i = 0 ; i < size ; ++i)
      AbsRow[i] = IFPACK_ABS(values[i]);
    count = size;

    double cutoff = 0.0;
    if (count > LOF) {
      nth_element(AbsRow.begin(), AbsRow.begin() + LOF, AbsRow.begin() + count, 

      cutoff = AbsRow[LOF];

    for (int i = 0 ; i < size ; ++i)
      h_ii -= values[i] * values[i];

    if (h_ii < 0.0) h_ii = 1e-12;;

    h_ii = sqrt(h_ii);

    // only approx, + 1 == sqrt
    ComputeFlops_ += 2 * size + 1;

    double DiscardedElements = 0.0;

    count = 0;
    for (int i = 0 ; i < size ; ++i)    
      if (IFPACK_ABS(values[i]) > cutoff)
        values[count] = values[i];
        keys[count] = keys[i];
        DiscardedElements += values[i];

    if (RelaxValue() != 0.0) {
      DiscardedElements *= RelaxValue();
      h_ii += DiscardedElements;

    values[count] = h_ii;
    keys[count] = row_i;

    H_->InsertGlobalValues(row_i, count, &(values[0]), (int*)&(keys[0]));


#if 0
  // to check the complete factorization
  Epetra_Vector LHS(Matrix().RowMatrixRowMap());
  Epetra_Vector RHS1(Matrix().RowMatrixRowMap());
  Epetra_Vector RHS2(Matrix().RowMatrixRowMap());
  Epetra_Vector RHS3(Matrix().RowMatrixRowMap());


  RHS1.Update(-1.0, RHS3, 1.0);
  cout << endl;
  cout << RHS1;
  int MyNonzeros = H_->NumGlobalNonzeros();
  Comm().SumAll(&MyNonzeros, &GlobalNonzeros_, 1);

  IsComputed_ = true;
  double TotalFlops; // sum across all the processors
  A_.Comm().SumAll(&flops, &TotalFlops, 1);
  ComputeFlops_ += TotalFlops;
  ComputeTime_ += Time_.ElapsedTime();

