void SolveSystem ( Vector<double, int>& x_local, const MatrixDense<double, int>& K_local, const Vector<double, int>& b_local, const int numb_node_i, const int* list_node_i, const int* l2i, int numb_node_p, const int* list_node_p, const int* l2p, const int numb_global_node, const int numb_l2g, const int* l2g, const MPI_Comm& mpi_comm ) { // -- number of processors int numb_procs; // -- process number (process rank) int proc_numb; // -- get number of processes MPI_Comm_size( mpi_comm, &numb_procs ); // -- get current process rank MPI_Comm_rank( mpi_comm, &proc_numb ); MatrixDense<double,int> Kii; MatrixDense<double,int> Kip; MatrixDense<double,int> Kpi; MatrixDense<double,int> Kpp; Schur::SplitMatrixToBlock(Kii, Kip, Kpi, Kpp, K_local, list_node_i, numb_node_i, list_node_p, numb_node_p); // Check transpose //CheckTranspose(Kip, Kpi); // Reconstruct K //std::string gmatrix_filename = "../output/cube-125_2/cube-125_g_2.csv"; //ReconstructK(K_local, l2g, numb_global_node, gmatrix_filename, mpi_comm); // LU factorization of Kii MatrixDense<double, int> Kii_lu; Factor::LU(Kii_lu, Kii); MatrixDense<double, int> Uii_inv,Lii_inv; Lii_inv.Allocate(numb_node_i, numb_node_i); Uii_inv.Allocate(numb_node_i, numb_node_i); Vector<double, int> x,rhs; rhs.Allocate(numb_node_i); for(int i = 0;i < numb_node_i;++i) rhs(i) = 0; // invert Lii and Uii for(int i = 0;i < numb_node_i;++i){ if(i > 0) rhs(i - 1) = 0; rhs(i) = 1; DirectSolver::Forward(x, Kii_lu, rhs); for(int j = 0;j < numb_node_i;++j) Lii_inv(j,i) = x(j); DirectSolver::Backward(x,Kii_lu, rhs); for(int j = 0;j < numb_node_i;++j) Uii_inv(j,i) = x(j); } // calculate S_local MatrixDense<double, int> Lpi,Uip,prod; Kpi.MatrixMatrixProduct(Lpi, Uii_inv); Lii_inv.MatrixMatrixProduct(Uip, Kip); Lpi.MatrixMatrixProduct(prod, Uip); MatrixDense<double, int> S_local; Kpp.MatrixMatrixSubstraction(S_local, prod); // merge all numb_node_p in one list int length_list_p[numb_procs]; MPI_Allgather(&numb_node_p, 1, MPI_INT, length_list_p, 1, MPI_INT, mpi_comm); /*std::stringstream out_length_list; for(int i = 0;i < numb_procs;++i){ out_length_list << length_list_p[i] << " "; } out_length_list << "\n";*/ int all_list_global_p[numb_procs][numb_global_node]; for(int i = 0;i < numb_procs;++i){ if(proc_numb == i){ for(int j = 0;j < numb_node_p;++j){ all_list_global_p[i][j] = l2g[ list_node_p[j] ]; } } MPI_Bcast(all_list_global_p[i], length_list_p[i], MPI_INT, i, mpi_comm); } /*for(int i = 0;i < numb_procs;++i){ for(int j = 0;j < length_list_p[i];++j){ out_length_list << all_list_global_p[i][j] << " "; } out_length_list << "\n"; } iomrg::printf("%s\n", out_length_list.str().c_str());*/ // create array pos_S : from global position to position in S int pos_S[numb_global_node]; for(int i = 0;i < numb_global_node;++i){ pos_S[i] = -1; } std::vector<int> S_indices; for(int i = 0;i < numb_procs;++i){ for(int j = 0;j < length_list_p[i];++j){ S_indices.push_back(all_list_global_p[i][j]); } } std::sort(S_indices.begin(), S_indices.end()); S_indices.erase(std::unique(S_indices.begin(), S_indices.end()), S_indices.end()); int size_S = S_indices.size(); for(int i = 0;i < size_S;++i){ pos_S[ S_indices[i] ] = i; } // Assemble S MatrixDense<double, int> S; S.Allocate(size_S, size_S); S.Initialize(0); double aux_coef[size_S]; iomrg::printf("size_S = %d\n",size_S); for(int i = 0;i < numb_procs;++i){ int size_local_S = length_list_p[i]; for(int j = 0;j < size_local_S;++j){ if(proc_numb == i){ for(int k = 0;k < size_local_S;++k){ aux_coef[k] = S_local(j,k); } } MPI_Bcast(aux_coef, size_local_S, MPI_DOUBLE, i, mpi_comm); int r = pos_S[ all_list_global_p[i][j] ]; for(int k = 0;k < size_local_S;++k){ S(r, pos_S[ all_list_global_p[i][k] ]) += aux_coef[k]; } } } // Separate b_local Vector<double, int> b_i,b_p; b_i.Allocate(numb_node_i); b_p.Allocate(numb_node_p); for(int i = 0;i < numb_l2g;++i){ if(l2p[i] == -1){ b_i( l2i[i] ) = b_local(i); }else{ b_p( l2p[i] ) = b_local(i); } } // Calculate y_p_local Vector<double, int> z; DirectSolver::Forward(z, Kii_lu, b_i); Vector<double, int> aux_prod; Lpi.MatrixVectorProduct(aux_prod, z); Vector<double, int> y_p_local; y_p_local.Allocate(numb_node_p); for(int i = 0;i < numb_node_p;++i){ y_p_local(i) = b_p(i) - aux_prod(i); } // Calculate y_p Vector<double, int> y_p(size_S); for(int i = 0;i < size_S;++i){ aux_coef[i] = 0; } for(int i = 0;i < numb_node_p;++i){ aux_coef[ pos_S[ l2g[ list_node_p[i] ] ] ] += y_p_local(i); } MPI_Allreduce(aux_coef, y_p.GetCoef(), size_S, MPI_DOUBLE, MPI_SUM,mpi_comm); // Calculate x_p Vector<double, int> x_p; DirectSolver::SolveLU(x_p, S, y_p); // Calculate y_i Vector<double, int> aux_prod_2; aux_prod_2.Allocate(numb_node_i); aux_prod_2.Assign(0, numb_node_i - 1, 0); for(int i = 0;i < numb_node_i;++i){ for(int j = 0;j < numb_node_p;++j){ int id_S = pos_S[ l2g[ list_node_p[j] ] ]; aux_prod_2(i) += Uip(i,j) * x_p(id_S); } } Vector<double, int> y_i; y_i.Allocate(numb_node_i); for(int i = 0;i < numb_node_i;++i){ y_i(i) = z(i) - aux_prod_2(i); } // Calculate x_i Vector<double, int> x_i; DirectSolver::Backward(x_i, Kii_lu, y_i); // Assemble x_local for(int i = 0;i < numb_node_i;++i){ x_local(list_node_i[i]) = x_i(i); } for(int i = 0;i < numb_node_p;++i){ x_local(list_node_p[i]) = x_p(i); } }
Vector ADFun<Base>::Forward( size_t p , const Vector& x_p , std::ostream& s ) { // temporary indices size_t i, j; // number of independent variables size_t n = ind_taddr_.size(); // number of dependent variables size_t m = dep_taddr_.size(); // check Vector is Simple Vector class with Base type elements CheckSimpleVector<Base, Vector>(); CPPAD_ASSERT_KNOWN( size_t(x_p.size()) == n, "Second argument to Forward does not have length equal to\n" "the dimension of the domain for the corresponding ADFun." ); CPPAD_ASSERT_KNOWN( p <= taylor_per_var_, "The number of taylor_ coefficient currently stored\n" "in this ADFun object is less than p." ); // check if the taylor_ matrix needs more columns if( taylor_col_dim_ <= p ) capacity_taylor(p + 1); CPPAD_ASSERT_UNKNOWN( taylor_col_dim_ > p ); // set the p-th order taylor_ coefficients for independent variables for(j = 0; j < n; j++) { CPPAD_ASSERT_UNKNOWN( ind_taddr_[j] < total_num_var_ ); // ind_taddr_[j] is operator taddr for j-th independent variable CPPAD_ASSERT_UNKNOWN( play_.GetOp( ind_taddr_[j] ) == InvOp ); // It is also variable taddr for j-th independent variable taylor_[ind_taddr_[j] * taylor_col_dim_ + p] = x_p[j]; } // evaluate the derivatives if( p == 0 ) { # if CPPAD_USE_FORWARD0SWEEP compare_change_ = forward0sweep(s, true, n, total_num_var_, &play_, taylor_col_dim_, taylor_.data() ); # else compare_change_ = forward_sweep(s, true, p, n, total_num_var_, &play_, taylor_col_dim_, taylor_.data() ); # endif } else forward_sweep(s, false, p, n, total_num_var_, &play_, taylor_col_dim_, taylor_.data() ); // return the p-th order taylor_ coefficients for dependent variables Vector y_p(m); for(i = 0; i < m; i++) { CPPAD_ASSERT_UNKNOWN( dep_taddr_[i] < total_num_var_ ); y_p[i] = taylor_[dep_taddr_[i] * taylor_col_dim_ + p]; } # ifndef NDEBUG if( hasnan(y_p) ) { if( p == 0 ) { CPPAD_ASSERT_KNOWN(false, "y = f.Forward(0, x): has a nan in y." ); } else { CPPAD_ASSERT_KNOWN(false, "y_p = f.Forward(p, x_p): has a nan in y_p for p > 0, " "but not for p = 0." ); } } # endif // now we have p + 1 taylor_ coefficients per variable taylor_per_var_ = p + 1; return y_p; }