/* Out-of-place version of transpose_mpi (or rather, in place using a scratch array): */ static void transpose_mpi_out_of_place(transpose_mpi_plan p, int el_size, TRANSPOSE_EL_TYPE *local_data, TRANSPOSE_EL_TYPE *work) { local_transpose_copy(local_data, work, el_size, p->local_nx, p->ny); if (p->all_blocks_equal) MPI_Alltoall(work, p->send_block_size * el_size, p->el_type, local_data, p->recv_block_size * el_size, p->el_type, p->comm); else { int i, n_pes = p->n_pes; for (i = 0; i < n_pes; ++i) { p->send_block_sizes[i] *= el_size; p->recv_block_sizes[i] *= el_size; p->send_block_offsets[i] *= el_size; p->recv_block_offsets[i] *= el_size; } MPI_Alltoallv(work, p->send_block_sizes, p->send_block_offsets, p->el_type, local_data, p->recv_block_sizes, p->recv_block_offsets, p->el_type, p->comm); for (i = 0; i < n_pes; ++i) { p->send_block_sizes[i] /= el_size; p->recv_block_sizes[i] /= el_size; p->send_block_offsets[i] /= el_size; p->recv_block_offsets[i] /= el_size; } } do_permutation(local_data, p->perm_block_dest, p->num_perm_blocks, p->perm_block_size * el_size); }
void transpose_in_place_local(transpose_mpi_plan p, int el_size, TRANSPOSE_EL_TYPE *local_data, transpose_in_place_which which) { switch (which) { case BEFORE_TRANSPOSE: if (el_size == 1) TOMS_transpose_2d(local_data, p->local_nx, p->ny, p->move, p->move_size); else TOMS_transpose_2d_arbitrary(local_data, p->local_nx, p->ny, el_size, p->move, p->move_size); break; case AFTER_TRANSPOSE: do_permutation(local_data, p->perm_block_dest, p->num_perm_blocks, p->perm_block_size * el_size); break; } }