void CubeMesh::buildStencil() { static const unsigned int flag = EMPTY; // fillSpaceToMeshLookup(); unsigned int num = m2s_.size(); setStencilSize( num, num ); for ( unsigned int i = 0; i < num; ++i ) { unsigned int q = m2s_[i]; unsigned int ix = q % nx_; unsigned int iy = ( q / nx_ ) % ny_; unsigned int iz = ( q / ( nx_ * ny_ ) ) % nz_; vector< double > entry; vector< unsigned int > colIndex; vector< Ecol > e; if ( ix > 0 && s2m_[q-1] != flag ) { e.push_back( Ecol( dy_ * dz_ / dx_, s2m_[q-1] ) ); } if ( ( ix < nx_ - 1 ) && s2m_[q+1] != flag ) { e.push_back( Ecol( dy_ * dz_ / dx_, s2m_[q+1] ) ); } if ( iy > 0 && s2m_[ q-nx_ ] != flag ) { assert( q >= nx_ ); e.push_back( Ecol( dx_ * dz_ / dy_, s2m_[q-nx_] ) ); } if ( iy < ny_ - 1 && s2m_[ q+nx_ ] != flag ) { assert( q+nx_ < s2m_.size() ); e.push_back( Ecol( dx_ * dz_ / dy_, s2m_[q+nx_] ) ); } if ( iz > 0 && s2m_[ q - nx_*ny_ ] != flag ) { assert( q >= nx_ * ny_ ); e.push_back( Ecol( dx_ * dy_ / dz_, s2m_[q - nx_ * ny_] ) ); } if ( iz < nz_ - 1 && s2m_[ q + nx_*ny_ ] ) { assert( q+nx_ < s2m_.size() ); e.push_back( Ecol( dx_ * dy_ / dz_, s2m_[q + nx_ * ny_] ) ); } sort( e.begin(), e.end() ); for ( vector< Ecol >::iterator j = e.begin(); j != e.end(); ++j ) { entry.push_back( j->e_ ); colIndex.push_back( j->col_ ); } addRow( i, entry, colIndex ); } innerResetStencil(); }
subdomain_deflation( MPI_Comm mpi_comm, const Matrix &Astrip, const DeflationVectors &def_vec, const AMG_params &amg_params = AMG_params(), const Solver_params &solver_params = Solver_params(), const DirectSolver_params &direct_solver_params = DirectSolver_params() ) : comm(mpi_comm), nrows(backend::rows(Astrip)), ndv(def_vec.dim()), nz(comm.size * ndv), dtype( datatype<value_type>::get() ), df( nz ), dx( nz ), q( Backend::create_vector(nrows, amg_params.backend) ), dd( Backend::create_vector(nz, amg_params.backend) ), Z( ndv ) { typedef backend::crs<value_type, long> build_matrix; typedef typename backend::row_iterator<Matrix>::type row_iterator1; typedef typename backend::row_iterator<build_matrix>::type row_iterator2; boost::shared_ptr<build_matrix> aloc = boost::make_shared<build_matrix>(); boost::shared_ptr<build_matrix> arem = boost::make_shared<build_matrix>(); boost::shared_ptr<build_matrix> az = boost::make_shared<build_matrix>(); // Get sizes of each domain in comm. std::vector<long> domain(comm.size + 1, 0); MPI_Allgather(&nrows, 1, MPI_LONG, &domain[1], 1, MPI_LONG, comm); boost::partial_sum(domain, domain.begin()); long chunk_start = domain[comm.rank]; // Fill deflation vectors. { std::vector<value_type> z(nrows); for(int j = 0; j < ndv; ++j) { for(long i = 0; i < nrows; ++i) z[i] = def_vec(i, j); Z[j] = Backend::copy_vector(z, amg_params.backend); } } // Number of nonzeros in local and remote parts of the matrix. long loc_nnz = 0, rem_nnz = 0; // Maps remote column numbers to local ids: std::map<long, long> rc; std::map<long, long>::iterator rc_it = rc.begin(); // First pass over Astrip rows: // 1. Count local and remote nonzeros, // 2. Build set of remote columns, // 3. Build sparsity pattern of matrix AZ. az->nrows = nrows; az->ncols = nz; az->ptr.resize(nrows + 1, 0); std::vector<long> marker(nz, -1); for(long i = 0; i < nrows; ++i) { for(row_iterator1 a = backend::row_begin(Astrip, i); a; ++a) { long c = a.col(); // Domain the column belongs to long d = boost::upper_bound(domain, c) - domain.begin() - 1; if (d == comm.rank) { ++loc_nnz; } else { ++rem_nnz; rc_it = rc.insert(rc_it, std::make_pair(c, 0)); } if (marker[d] != i) { marker[d] = i; az->ptr[i + 1] += ndv; } } } // Find out: // 1. How many columns do we need from each process, // 2. What columns do we need from them. // // Renumber remote columns while at it. std::vector<long> num_recv(comm.size, 0); std::vector<long> recv_cols; recv_cols.reserve(rc.size()); long id = 0, cur_nbr = 0; for(rc_it = rc.begin(); rc_it != rc.end(); ++rc_it) { rc_it->second = id++; recv_cols.push_back(rc_it->first); while(rc_it->first >= domain[cur_nbr + 1]) cur_nbr++; num_recv[cur_nbr]++; } /*** Set up communication pattern. ***/ // Who sends to whom and how many boost::multi_array<long, 2> comm_matrix( boost::extents[comm.size][comm.size] ); MPI_Allgather( num_recv.data(), comm.size, MPI_LONG, comm_matrix.data(), comm.size, MPI_LONG, comm ); long snbr = 0, rnbr = 0, send_size = 0; for(int i = 0; i < comm.size; ++i) { if (comm_matrix[comm.rank][i]) { ++rnbr; } if (comm_matrix[i][comm.rank]) { ++snbr; send_size += comm_matrix[i][comm.rank]; } } recv.nbr.reserve(rnbr); recv.ptr.reserve(rnbr + 1); recv.val.resize(rc.size()); recv.req.resize(rnbr); dv = Backend::create_vector( rc.size(), amg_params.backend ); send.nbr.reserve(snbr); send.ptr.reserve(snbr + 1); send.val.resize(send_size); send.req.resize(snbr); std::vector<long> send_col(send_size); // Count how many columns to send and to receive. recv.ptr.push_back(0); send.ptr.push_back(0); for(int i = 0; i < comm.size; ++i) { if (long nr = comm_matrix[comm.rank][i]) { recv.nbr.push_back( i ); recv.ptr.push_back( recv.ptr.back() + nr ); } if (long ns = comm_matrix[i][comm.rank]) { send.nbr.push_back( i ); send.ptr.push_back( send.ptr.back() + ns ); } } // What columns do you need from me? for(size_t i = 0; i < send.nbr.size(); ++i) MPI_Irecv(&send_col[send.ptr[i]], comm_matrix[send.nbr[i]][comm.rank], MPI_LONG, send.nbr[i], tag_exc_cols, comm, &send.req[i]); // Here is what I need from you: for(size_t i = 0; i < recv.nbr.size(); ++i) MPI_Isend(&recv_cols[recv.ptr[i]], comm_matrix[comm.rank][recv.nbr[i]], MPI_LONG, recv.nbr[i], tag_exc_cols, comm, &recv.req[i]); /* While messages are in flight, */ // Second pass over Astrip rows: // 1. Build local and remote matrix parts. // 2. Build local part of AZ matrix. aloc->nrows = nrows; aloc->ncols = nrows; aloc->ptr.reserve(nrows + 1); aloc->col.reserve(loc_nnz); aloc->val.reserve(loc_nnz); aloc->ptr.push_back(0); arem->nrows = nrows; arem->ncols = rc.size(); arem->ptr.reserve(nrows + 1); arem->col.reserve(rem_nnz); arem->val.reserve(rem_nnz); arem->ptr.push_back(0); boost::partial_sum(az->ptr, az->ptr.begin()); az->col.resize(az->ptr.back()); az->val.resize(az->ptr.back()); boost::fill(marker, -1); for(long i = 0; i < nrows; ++i) { long az_row_beg = az->ptr[i]; long az_row_end = az_row_beg; for(row_iterator1 a = backend::row_begin(Astrip, i); a; ++a) { long c = a.col(); value_type v = a.value(); if ( domain[comm.rank] <= c && c < domain[comm.rank + 1] ) { long loc_c = c - chunk_start; aloc->col.push_back(loc_c); aloc->val.push_back(v); for(long j = 0, k = comm.rank * ndv; j < ndv; ++j, ++k) { if (marker[k] < az_row_beg) { marker[k] = az_row_end; az->col[az_row_end] = k; az->val[az_row_end] = v * def_vec(loc_c, j); ++az_row_end; } else { az->val[marker[k]] += v * def_vec(loc_c, j); } } } else { arem->col.push_back(rc[c]); arem->val.push_back(v); } } az->ptr[i] = az_row_end; aloc->ptr.push_back(aloc->col.size()); arem->ptr.push_back(arem->col.size()); } /* Finish communication pattern setup. */ MPI_Waitall(recv.req.size(), recv.req.data(), MPI_STATUSES_IGNORE); MPI_Waitall(send.req.size(), send.req.data(), MPI_STATUSES_IGNORE); // Shift columns to send to local numbering: BOOST_FOREACH(long &c, send_col) c -= chunk_start; /* Finish construction of AZ */ boost::multi_array<value_type, 2> zsend(boost::extents[send.val.size()][ndv]); boost::multi_array<value_type, 2> zrecv(boost::extents[recv.val.size()][ndv]); // Exchange deflation vectors for(size_t i = 0; i < recv.nbr.size(); ++i) MPI_Irecv( &zrecv[recv.ptr[i]][0], ndv * (recv.ptr[i+1] - recv.ptr[i]), dtype, recv.nbr[i], tag_exc_vals, comm, &recv.req[i]); for(size_t i = 0; i < send_col.size(); ++i) for(long j = 0; j < ndv; ++j) zsend[i][j] = def_vec(send_col[i], j); for(size_t i = 0; i < send.nbr.size(); ++i) MPI_Isend( &zsend[send.ptr[i]][0], ndv * (send.ptr[i+1] - send.ptr[i]), dtype, send.nbr[i], tag_exc_vals, comm, &send.req[i]); MPI_Waitall(recv.req.size(), recv.req.data(), MPI_STATUSES_IGNORE); boost::fill(marker, -1); for(long i = 0; i < nrows; ++i) { long az_row_beg = az->ptr[i]; long az_row_end = az_row_beg; for(row_iterator2 a = backend::row_begin(*arem, i); a; ++a) { long c = a.col(); value_type v = a.value(); // Domain the column belongs to long d = recv.nbr[boost::upper_bound(recv.ptr, c) - recv.ptr.begin() - 1]; for(long j = 0, k = d * ndv; j < ndv; ++j, ++k) { if (marker[k] < az_row_beg) { marker[k] = az_row_end; az->col[az_row_end] = k; az->val[az_row_end] = v * zrecv[c][j]; ++az_row_end; } else { az->val[marker[k]] += v * zrecv[c][j]; } } } az->ptr[i] = az_row_end; } std::rotate(az->ptr.begin(), az->ptr.end() - 1, az->ptr.end()); az->ptr.front() = 0; MPI_Waitall(send.req.size(), send.req.data(), MPI_STATUSES_IGNORE); /* Build deflated matrix E. */ boost::multi_array<value_type, 2> erow(boost::extents[ndv][nz]); std::fill_n(erow.data(), erow.num_elements(), 0); for(long i = 0; i < nrows; ++i) { for(row_iterator2 a = backend::row_begin(*az, i); a; ++a) { long c = a.col(); value_type v = a.value(); for(long j = 0; j < ndv; ++j) erow[j][c] += v * def_vec(i, j); } } // Count nonzeros in E. std::vector<int> Eptr(nz + 1, 0); for(int i = 0; i < comm.size; ++i) for(int j = 0; j < comm.size; ++j) if (j == i || comm_matrix[i][j]) for(int k = 0; k < ndv; ++k) Eptr[i * ndv + k + 1] += ndv; boost::partial_sum(Eptr, Eptr.begin()); std::vector<int> Ecol(Eptr.back()); std::vector<value_type> Eval(Eptr.back()); // Fill local strip of E. for(int i = 0; i < ndv; ++i) { int row_head = Eptr[comm.rank * ndv + i]; for(int j = 0; j < comm.size; ++j) { if (j == comm.rank || comm_matrix[comm.rank][j]) { for(int k = 0; k < ndv; ++k) { int c = j * ndv + k; Ecol[row_head] = c; Eval[row_head] = erow[i][c]; ++row_head; } } } } // Exchange strips of E. for(int p = 0; p < comm.size; ++p) { int ns = Eptr[(p + 1) * ndv] - Eptr[p * ndv]; MPI_Bcast(&Ecol[Eptr[p * ndv]], ns, MPI_INT, p, comm); MPI_Bcast(&Eval[Eptr[p * ndv]], ns, dtype, p, comm); } // Prepare E factorization. E = boost::make_shared<DirectSolver>( boost::tie(nz, Eptr, Ecol, Eval), direct_solver_params ); // Create local AMG preconditioner. P = boost::make_shared<AMG>( *aloc, amg_params ); // Create iterative solver instance. solve = boost::make_shared<Solver>( nrows, solver_params, amg_params.backend, detail::mpi_inner_product(mpi_comm) ); // Move matrices to backend. Arem = Backend::copy_matrix(arem, amg_params.backend); AZ = Backend::copy_matrix(az, amg_params.backend); // Columns gatherer. Will retrieve columns to send from backend. gather = boost::make_shared<typename Backend::gather>( nrows, send_col, amg_params.backend); }