Beispiel #1
0
void SolveSystem (
        Vector<double, int>& x_local,
        const MatrixDense<double, int>& K_local,
        const Vector<double, int>& b_local,
        const int numb_node_i,
        const int* list_node_i,
        const int* l2i,
        int numb_node_p,
        const int* list_node_p,
        const int* l2p,
        const int numb_global_node,
        const int numb_l2g,
        const int* l2g,
        const MPI_Comm& mpi_comm ) {
  // -- number of processors
  int numb_procs;
  // -- process number (process rank)
  int proc_numb;

  // -- get number of processes
  MPI_Comm_size( mpi_comm, &numb_procs );
  // -- get current process rank
  MPI_Comm_rank( mpi_comm, &proc_numb );


  MatrixDense<double,int> Kii;
  MatrixDense<double,int> Kip;
  MatrixDense<double,int> Kpi;
  MatrixDense<double,int> Kpp;

  Schur::SplitMatrixToBlock(Kii, Kip, Kpi, Kpp, K_local,
                            list_node_i, numb_node_i,
                            list_node_p, numb_node_p);

  // Check transpose
  //CheckTranspose(Kip, Kpi);

  // Reconstruct K
  //std::string gmatrix_filename = "../output/cube-125_2/cube-125_g_2.csv";
  //ReconstructK(K_local, l2g, numb_global_node, gmatrix_filename, mpi_comm);

  // LU factorization of Kii
  MatrixDense<double, int> Kii_lu;
  Factor::LU(Kii_lu, Kii);

  MatrixDense<double, int> Uii_inv,Lii_inv;
  Lii_inv.Allocate(numb_node_i, numb_node_i);
  Uii_inv.Allocate(numb_node_i, numb_node_i);

  Vector<double, int> x,rhs;
  rhs.Allocate(numb_node_i);

  for(int i = 0;i < numb_node_i;++i)
    rhs(i) = 0;

  // invert Lii and Uii
  for(int i = 0;i < numb_node_i;++i){
    if(i > 0) rhs(i - 1) = 0;
    rhs(i) = 1;
    DirectSolver::Forward(x, Kii_lu, rhs);

    for(int j = 0;j < numb_node_i;++j)
      Lii_inv(j,i) = x(j);

    DirectSolver::Backward(x,Kii_lu, rhs);

    for(int j = 0;j < numb_node_i;++j)
      Uii_inv(j,i) = x(j);
  }

  // calculate S_local
  MatrixDense<double, int> Lpi,Uip,prod;
  Kpi.MatrixMatrixProduct(Lpi, Uii_inv);
  Lii_inv.MatrixMatrixProduct(Uip, Kip);
  Lpi.MatrixMatrixProduct(prod, Uip);

  MatrixDense<double, int> S_local;
  Kpp.MatrixMatrixSubstraction(S_local, prod);

  // merge all numb_node_p in one list
  int length_list_p[numb_procs];

  MPI_Allgather(&numb_node_p, 1, MPI_INT, length_list_p, 1, MPI_INT, mpi_comm);

  /*std::stringstream out_length_list;

  for(int i = 0;i < numb_procs;++i){
    out_length_list << length_list_p[i] << " ";
  }
  out_length_list << "\n";*/

  int all_list_global_p[numb_procs][numb_global_node];

  for(int i = 0;i < numb_procs;++i){
    if(proc_numb == i){
      for(int j = 0;j < numb_node_p;++j){
        all_list_global_p[i][j] = l2g[ list_node_p[j] ];
      }
    }

    MPI_Bcast(all_list_global_p[i], length_list_p[i], MPI_INT, i, mpi_comm);
  }

  /*for(int i = 0;i < numb_procs;++i){
    for(int j = 0;j < length_list_p[i];++j){
      out_length_list << all_list_global_p[i][j] << " ";
    }

    out_length_list << "\n";
  }

  iomrg::printf("%s\n", out_length_list.str().c_str());*/

  // create array pos_S : from global position to position in S
  int pos_S[numb_global_node];

  for(int i = 0;i < numb_global_node;++i){
    pos_S[i] = -1;
  }

  std::vector<int> S_indices;

  for(int i = 0;i < numb_procs;++i){
    for(int j = 0;j < length_list_p[i];++j){
      S_indices.push_back(all_list_global_p[i][j]);
    }
  }

  std::sort(S_indices.begin(), S_indices.end());
  S_indices.erase(std::unique(S_indices.begin(), S_indices.end()), S_indices.end());
  int size_S = S_indices.size();

  for(int i = 0;i < size_S;++i){
    pos_S[ S_indices[i] ] = i;
  }

  // Assemble S
  MatrixDense<double, int> S;
  S.Allocate(size_S, size_S);
  S.Initialize(0);

  double aux_coef[size_S];

  iomrg::printf("size_S = %d\n",size_S);

  for(int i = 0;i < numb_procs;++i){
    int size_local_S = length_list_p[i];

    for(int j = 0;j < size_local_S;++j){
      if(proc_numb == i){
        for(int k = 0;k < size_local_S;++k){
          aux_coef[k] = S_local(j,k);
        }
      }

      MPI_Bcast(aux_coef, size_local_S, MPI_DOUBLE, i, mpi_comm);

      int r = pos_S[ all_list_global_p[i][j] ];

      for(int k = 0;k < size_local_S;++k){
        S(r, pos_S[ all_list_global_p[i][k] ]) += aux_coef[k];
      }
    }
  }

  // Separate b_local
  Vector<double, int> b_i,b_p;
  b_i.Allocate(numb_node_i);
  b_p.Allocate(numb_node_p);

  for(int i = 0;i < numb_l2g;++i){
    if(l2p[i] == -1){
      b_i( l2i[i] ) = b_local(i);
    }else{
      b_p( l2p[i] ) = b_local(i);
    }
  }

  // Calculate y_p_local
  Vector<double, int> z;
  DirectSolver::Forward(z, Kii_lu, b_i);

  Vector<double, int> aux_prod;
  Lpi.MatrixVectorProduct(aux_prod, z);
  Vector<double, int> y_p_local;
  y_p_local.Allocate(numb_node_p);

  for(int i = 0;i < numb_node_p;++i){
    y_p_local(i) = b_p(i) - aux_prod(i);
  }

  // Calculate y_p
  Vector<double, int> y_p(size_S);

  for(int i = 0;i < size_S;++i){
    aux_coef[i] = 0;
  }

  for(int i = 0;i < numb_node_p;++i){
    aux_coef[ pos_S[ l2g[ list_node_p[i] ] ] ] += y_p_local(i);
  }

  MPI_Allreduce(aux_coef, y_p.GetCoef(), size_S, MPI_DOUBLE,  MPI_SUM,mpi_comm);

  // Calculate x_p
  Vector<double, int> x_p;
  DirectSolver::SolveLU(x_p, S, y_p);

  // Calculate y_i
  Vector<double, int> aux_prod_2;
  aux_prod_2.Allocate(numb_node_i);
  aux_prod_2.Assign(0, numb_node_i - 1, 0);

  for(int i = 0;i < numb_node_i;++i){
    for(int j = 0;j < numb_node_p;++j){
      int id_S = pos_S[ l2g[ list_node_p[j] ] ];
      aux_prod_2(i) += Uip(i,j) * x_p(id_S);
    }
  }

  Vector<double, int> y_i;
  y_i.Allocate(numb_node_i);

  for(int i = 0;i < numb_node_i;++i){
    y_i(i) = z(i) - aux_prod_2(i);
  }

  // Calculate x_i
  Vector<double, int> x_i;
  DirectSolver::Backward(x_i, Kii_lu, y_i);

  // Assemble x_local
  for(int i = 0;i < numb_node_i;++i){
    x_local(list_node_i[i]) = x_i(i);
  }

  for(int i = 0;i < numb_node_p;++i){
    x_local(list_node_p[i]) = x_p(i);
  }
}
Beispiel #2
0
Vector ADFun<Base>::Forward(
	size_t p                    , 
	const Vector& x_p           , 
	std::ostream& s             )
{	// temporary indices
	size_t i, j;

	// number of independent variables
	size_t n = ind_taddr_.size();

	// number of dependent variables
	size_t m = dep_taddr_.size();

	// check Vector is Simple Vector class with Base type elements
	CheckSimpleVector<Base, Vector>();

	CPPAD_ASSERT_KNOWN(
		size_t(x_p.size()) == n,
		"Second argument to Forward does not have length equal to\n"
		"the dimension of the domain for the corresponding ADFun."
	);
	CPPAD_ASSERT_KNOWN(
		p <= taylor_per_var_,
		"The number of taylor_ coefficient currently stored\n"
		"in this ADFun object is less than p."
	);  

	// check if the taylor_ matrix needs more columns
	if( taylor_col_dim_ <= p )
		capacity_taylor(p + 1);
	CPPAD_ASSERT_UNKNOWN( taylor_col_dim_ > p );

	// set the p-th order taylor_ coefficients for independent variables
	for(j = 0; j < n; j++)
	{	CPPAD_ASSERT_UNKNOWN( ind_taddr_[j] < total_num_var_ );

		// ind_taddr_[j] is operator taddr for j-th independent variable
		CPPAD_ASSERT_UNKNOWN( play_.GetOp( ind_taddr_[j] ) == InvOp );

		// It is also variable taddr for j-th independent variable
		taylor_[ind_taddr_[j] * taylor_col_dim_ + p] = x_p[j];
	}

	// evaluate the derivatives
	if( p == 0 )
	{
# if CPPAD_USE_FORWARD0SWEEP
		compare_change_ = forward0sweep(s, true,
			n, total_num_var_, &play_, taylor_col_dim_, taylor_.data()
		);
# else
		compare_change_ = forward_sweep(s, true,
			p, n, total_num_var_, &play_, taylor_col_dim_, taylor_.data()
		);
# endif
	}
	else forward_sweep(s, false,
		p, n, total_num_var_, &play_, taylor_col_dim_, taylor_.data()
	);

	// return the p-th order taylor_ coefficients for dependent variables
	Vector y_p(m);
	for(i = 0; i < m; i++)
	{	CPPAD_ASSERT_UNKNOWN( dep_taddr_[i] < total_num_var_ );
		y_p[i] = taylor_[dep_taddr_[i] * taylor_col_dim_ + p];
	}
# ifndef NDEBUG
	if( hasnan(y_p) )
	{	if( p == 0 )
		{	CPPAD_ASSERT_KNOWN(false,
				"y = f.Forward(0, x): has a nan in y."
			);  
		}
		else
		{	CPPAD_ASSERT_KNOWN(false,
				"y_p = f.Forward(p, x_p): has a nan in y_p for p > 0, "
				"but not for p = 0."
			);
		}
	}
# endif


	// now we have p + 1  taylor_ coefficients per variable
	taylor_per_var_ = p + 1;

	return y_p;
}