Example #1
0
void CubeMesh::buildStencil()
{
	static const unsigned int flag = EMPTY;


	// fillSpaceToMeshLookup();
	unsigned int num = m2s_.size();
	setStencilSize( num, num );
	for ( unsigned int i = 0; i < num; ++i ) {
		unsigned int q = m2s_[i];
		unsigned int ix = q % nx_;
		unsigned int iy = ( q / nx_ ) % ny_;
		unsigned int iz = ( q / ( nx_ * ny_ ) ) % nz_;
		vector< double > entry;
		vector< unsigned int > colIndex;
		vector< Ecol > e;

		if ( ix > 0 && s2m_[q-1] != flag ) {
			e.push_back( Ecol( dy_ * dz_ / dx_, s2m_[q-1] ) );
		}
		if ( ( ix < nx_ - 1 ) && s2m_[q+1] != flag ) {
			e.push_back( Ecol( dy_ * dz_ / dx_, s2m_[q+1] ) );
		}
		if ( iy > 0 && s2m_[ q-nx_ ] != flag ) {
			assert( q >= nx_ );
			e.push_back( Ecol( dx_ * dz_ / dy_, s2m_[q-nx_] ) );
		}
		if ( iy < ny_ - 1 && s2m_[ q+nx_ ] != flag ) {
			assert( q+nx_ < s2m_.size() );
			e.push_back( Ecol( dx_ * dz_ / dy_, s2m_[q+nx_] ) );
		}
		if ( iz > 0 && s2m_[ q - nx_*ny_ ] != flag ) {
			assert( q >= nx_ * ny_ );
			e.push_back( Ecol( dx_ * dy_ / dz_, s2m_[q - nx_ * ny_] ) );
		}
		if ( iz < nz_ - 1 && s2m_[ q + nx_*ny_ ] ) {
			assert( q+nx_ < s2m_.size() );
			e.push_back( Ecol( dx_ * dy_ / dz_, s2m_[q + nx_ * ny_] ) );
		}
		sort( e.begin(), e.end() );
		for ( vector< Ecol >::iterator j = e.begin(); j != e.end(); ++j ) {
			entry.push_back( j->e_ );
			colIndex.push_back( j->col_ );
		}
		addRow( i, entry, colIndex );
	}
	innerResetStencil();
}
Example #2
0
        subdomain_deflation(
                MPI_Comm mpi_comm,
                const Matrix &Astrip,
                const DeflationVectors &def_vec,
                const AMG_params          &amg_params           = AMG_params(),
                const Solver_params       &solver_params        = Solver_params(),
                const DirectSolver_params &direct_solver_params = DirectSolver_params()
                )
        : comm(mpi_comm),
          nrows(backend::rows(Astrip)), ndv(def_vec.dim()), nz(comm.size * ndv),
          dtype( datatype<value_type>::get() ), df( nz ), dx( nz ),
          q( Backend::create_vector(nrows, amg_params.backend) ),
          dd( Backend::create_vector(nz, amg_params.backend) ),
          Z( ndv )
        {
            typedef backend::crs<value_type, long>                     build_matrix;
            typedef typename backend::row_iterator<Matrix>::type       row_iterator1;
            typedef typename backend::row_iterator<build_matrix>::type row_iterator2;

            boost::shared_ptr<build_matrix> aloc = boost::make_shared<build_matrix>();
            boost::shared_ptr<build_matrix> arem = boost::make_shared<build_matrix>();
            boost::shared_ptr<build_matrix> az   = boost::make_shared<build_matrix>();

            // Get sizes of each domain in comm.
            std::vector<long> domain(comm.size + 1, 0);
            MPI_Allgather(&nrows, 1, MPI_LONG, &domain[1], 1, MPI_LONG, comm);
            boost::partial_sum(domain, domain.begin());
            long chunk_start = domain[comm.rank];

            // Fill deflation vectors.
            {
                std::vector<value_type> z(nrows);
                for(int j = 0; j < ndv; ++j) {
                    for(long i = 0; i < nrows; ++i)
                        z[i] = def_vec(i, j);
                    Z[j] = Backend::copy_vector(z, amg_params.backend);
                }
            }

            // Number of nonzeros in local and remote parts of the matrix.
            long loc_nnz = 0, rem_nnz = 0;

            // Maps remote column numbers to local ids:
            std::map<long, long> rc;
            std::map<long, long>::iterator rc_it = rc.begin();

            // First pass over Astrip rows:
            // 1. Count local and remote nonzeros,
            // 2. Build set of remote columns,
            // 3. Build sparsity pattern of matrix AZ.
            az->nrows = nrows;
            az->ncols = nz;
            az->ptr.resize(nrows + 1, 0);

            std::vector<long> marker(nz, -1);

            for(long i = 0; i < nrows; ++i) {
                for(row_iterator1 a = backend::row_begin(Astrip, i); a; ++a) {
                    long c = a.col();

                    // Domain the column belongs to
                    long d = boost::upper_bound(domain, c) - domain.begin() - 1;

                    if (d == comm.rank) {
                        ++loc_nnz;
                    } else {
                        ++rem_nnz;
                        rc_it = rc.insert(rc_it, std::make_pair(c, 0));
                    }

                    if (marker[d] != i) {
                        marker[d] = i;
                        az->ptr[i + 1] += ndv;
                    }
                }
            }

            // Find out:
            // 1. How many columns do we need from each process,
            // 2. What columns do we need from them.
            //
            // Renumber remote columns while at it.
            std::vector<long> num_recv(comm.size, 0);
            std::vector<long> recv_cols;
            recv_cols.reserve(rc.size());
            long id = 0, cur_nbr = 0;
            for(rc_it = rc.begin(); rc_it != rc.end(); ++rc_it) {
                rc_it->second = id++;
                recv_cols.push_back(rc_it->first);

                while(rc_it->first >= domain[cur_nbr + 1]) cur_nbr++;
                num_recv[cur_nbr]++;
            }

            /*** Set up communication pattern. ***/
            // Who sends to whom and how many
            boost::multi_array<long, 2> comm_matrix(
                    boost::extents[comm.size][comm.size]
                    );

            MPI_Allgather(
                    num_recv.data(),    comm.size, MPI_LONG,
                    comm_matrix.data(), comm.size, MPI_LONG,
                    comm
                    );

            long snbr = 0, rnbr = 0, send_size = 0;
            for(int i = 0; i < comm.size; ++i) {
                if (comm_matrix[comm.rank][i]) {
                    ++rnbr;
                }

                if (comm_matrix[i][comm.rank]) {
                    ++snbr;
                    send_size += comm_matrix[i][comm.rank];
                }
            }

            recv.nbr.reserve(rnbr);
            recv.ptr.reserve(rnbr + 1);
            recv.val.resize(rc.size());
            recv.req.resize(rnbr);

            dv = Backend::create_vector( rc.size(), amg_params.backend );

            send.nbr.reserve(snbr);
            send.ptr.reserve(snbr + 1);
            send.val.resize(send_size);
            send.req.resize(snbr);

            std::vector<long> send_col(send_size);

            // Count how many columns to send and to receive.
            recv.ptr.push_back(0);
            send.ptr.push_back(0);
            for(int i = 0; i < comm.size; ++i) {
                if (long nr = comm_matrix[comm.rank][i]) {
                    recv.nbr.push_back( i );
                    recv.ptr.push_back( recv.ptr.back() + nr );
                }

                if (long ns = comm_matrix[i][comm.rank]) {
                    send.nbr.push_back( i );
                    send.ptr.push_back( send.ptr.back() + ns );
                }
            }

            // What columns do you need from me?
            for(size_t i = 0; i < send.nbr.size(); ++i)
                MPI_Irecv(&send_col[send.ptr[i]], comm_matrix[send.nbr[i]][comm.rank],
                        MPI_LONG, send.nbr[i], tag_exc_cols, comm, &send.req[i]);

            // Here is what I need from you:
            for(size_t i = 0; i < recv.nbr.size(); ++i)
                MPI_Isend(&recv_cols[recv.ptr[i]], comm_matrix[comm.rank][recv.nbr[i]],
                        MPI_LONG, recv.nbr[i], tag_exc_cols, comm, &recv.req[i]);

            /* While messages are in flight, */

            // Second pass over Astrip rows:
            // 1. Build local and remote matrix parts.
            // 2. Build local part of AZ matrix.
            aloc->nrows = nrows;
            aloc->ncols = nrows;
            aloc->ptr.reserve(nrows + 1);
            aloc->col.reserve(loc_nnz);
            aloc->val.reserve(loc_nnz);
            aloc->ptr.push_back(0);

            arem->nrows = nrows;
            arem->ncols = rc.size();
            arem->ptr.reserve(nrows + 1);
            arem->col.reserve(rem_nnz);
            arem->val.reserve(rem_nnz);
            arem->ptr.push_back(0);

            boost::partial_sum(az->ptr, az->ptr.begin());
            az->col.resize(az->ptr.back());
            az->val.resize(az->ptr.back());
            boost::fill(marker, -1);

            for(long i = 0; i < nrows; ++i) {
                long az_row_beg = az->ptr[i];
                long az_row_end = az_row_beg;

                for(row_iterator1 a = backend::row_begin(Astrip, i); a; ++a) {
                    long       c = a.col();
                    value_type v = a.value();

                    if ( domain[comm.rank] <= c && c < domain[comm.rank + 1] ) {
                        long loc_c = c - chunk_start;
                        aloc->col.push_back(loc_c);
                        aloc->val.push_back(v);

                        for(long j = 0, k = comm.rank * ndv; j < ndv; ++j, ++k) {
                            if (marker[k] < az_row_beg) {
                                marker[k] = az_row_end;
                                az->col[az_row_end] = k;
                                az->val[az_row_end] = v * def_vec(loc_c, j);
                                ++az_row_end;
                            } else {
                                az->val[marker[k]] += v * def_vec(loc_c, j);
                            }
                        }
                    } else {
                        arem->col.push_back(rc[c]);
                        arem->val.push_back(v);
                    }
                }

                az->ptr[i] = az_row_end;

                aloc->ptr.push_back(aloc->col.size());
                arem->ptr.push_back(arem->col.size());
            }


            /* Finish communication pattern setup. */
            MPI_Waitall(recv.req.size(), recv.req.data(), MPI_STATUSES_IGNORE);
            MPI_Waitall(send.req.size(), send.req.data(), MPI_STATUSES_IGNORE);

            // Shift columns to send to local numbering:
            BOOST_FOREACH(long &c, send_col) c -= chunk_start;


            /* Finish construction of AZ */
            boost::multi_array<value_type, 2> zsend(boost::extents[send.val.size()][ndv]);
            boost::multi_array<value_type, 2> zrecv(boost::extents[recv.val.size()][ndv]);

            // Exchange deflation vectors
            for(size_t i = 0; i < recv.nbr.size(); ++i)
                MPI_Irecv(
                        &zrecv[recv.ptr[i]][0], ndv * (recv.ptr[i+1] - recv.ptr[i]),
                        dtype, recv.nbr[i], tag_exc_vals, comm, &recv.req[i]);

            for(size_t i = 0; i < send_col.size(); ++i)
                for(long j = 0; j < ndv; ++j)
                    zsend[i][j] = def_vec(send_col[i], j);

            for(size_t i = 0; i < send.nbr.size(); ++i)
                MPI_Isend(
                        &zsend[send.ptr[i]][0], ndv * (send.ptr[i+1] - send.ptr[i]),
                        dtype, send.nbr[i], tag_exc_vals, comm, &send.req[i]);

            MPI_Waitall(recv.req.size(), recv.req.data(), MPI_STATUSES_IGNORE);

            boost::fill(marker, -1);

            for(long i = 0; i < nrows; ++i) {
                long az_row_beg = az->ptr[i];
                long az_row_end = az_row_beg;

                for(row_iterator2 a = backend::row_begin(*arem, i); a; ++a) {
                    long       c = a.col();
                    value_type v = a.value();

                    // Domain the column belongs to
                    long d = recv.nbr[boost::upper_bound(recv.ptr, c) - recv.ptr.begin() - 1];

                    for(long j = 0, k = d * ndv; j < ndv; ++j, ++k) {
                        if (marker[k] < az_row_beg) {
                            marker[k] = az_row_end;
                            az->col[az_row_end] = k;
                            az->val[az_row_end] = v * zrecv[c][j];
                            ++az_row_end;
                        } else {
                            az->val[marker[k]] += v * zrecv[c][j];
                        }
                    }
                }

                az->ptr[i] = az_row_end;
            }

            std::rotate(az->ptr.begin(), az->ptr.end() - 1, az->ptr.end());
            az->ptr.front() = 0;

            MPI_Waitall(send.req.size(), send.req.data(), MPI_STATUSES_IGNORE);

            /* Build deflated matrix E. */
            boost::multi_array<value_type, 2> erow(boost::extents[ndv][nz]);
            std::fill_n(erow.data(), erow.num_elements(), 0);

            for(long i = 0; i < nrows; ++i) {
                for(row_iterator2 a = backend::row_begin(*az, i); a; ++a) {
                    long       c = a.col();
                    value_type v = a.value();

                    for(long j = 0; j < ndv; ++j)
                        erow[j][c] += v * def_vec(i, j);
                }
            }

            // Count nonzeros in E.
            std::vector<int> Eptr(nz + 1, 0);
            for(int i = 0; i < comm.size; ++i)
                for(int j = 0; j < comm.size; ++j)
                    if (j == i || comm_matrix[i][j])
                        for(int k = 0; k < ndv; ++k)
                            Eptr[i * ndv + k + 1] += ndv;

            boost::partial_sum(Eptr, Eptr.begin());

            std::vector<int>        Ecol(Eptr.back());
            std::vector<value_type> Eval(Eptr.back());

            // Fill local strip of E.
            for(int i = 0; i < ndv; ++i) {
                int row_head = Eptr[comm.rank * ndv + i];
                for(int j = 0; j < comm.size; ++j) {
                    if (j == comm.rank || comm_matrix[comm.rank][j]) {
                        for(int k = 0; k < ndv; ++k) {
                            int c = j * ndv + k;
                            Ecol[row_head] = c;
                            Eval[row_head] = erow[i][c];
                            ++row_head;
                        }
                    }
                }
            }

            // Exchange strips of E.
            for(int p = 0; p < comm.size; ++p) {
                int ns = Eptr[(p + 1) * ndv] - Eptr[p * ndv];
                MPI_Bcast(&Ecol[Eptr[p * ndv]], ns, MPI_INT, p, comm);
                MPI_Bcast(&Eval[Eptr[p * ndv]], ns, dtype,   p, comm);
            }

            // Prepare E factorization.
            E = boost::make_shared<DirectSolver>(
                    boost::tie(nz, Eptr, Ecol, Eval), direct_solver_params
                    );

            // Create local AMG preconditioner.
            P = boost::make_shared<AMG>( *aloc, amg_params );

            // Create iterative solver instance.
            solve = boost::make_shared<Solver>(
                    nrows, solver_params, amg_params.backend,
                    detail::mpi_inner_product(mpi_comm)
                    );

            // Move matrices to backend.
            Arem = Backend::copy_matrix(arem, amg_params.backend);
            AZ   = Backend::copy_matrix(az,   amg_params.backend);

            // Columns gatherer. Will retrieve columns to send from backend.
            gather = boost::make_shared<typename Backend::gather>(
                    nrows, send_col, amg_params.backend);
        }