예제 #1
0
/* Out-of-place version of transpose_mpi (or rather, in place using
   a scratch array): */
static void transpose_mpi_out_of_place(transpose_mpi_plan p, int el_size,
				       TRANSPOSE_EL_TYPE *local_data,
				       TRANSPOSE_EL_TYPE *work)
{
     local_transpose_copy(local_data, work, el_size, p->local_nx, p->ny);

     if (p->all_blocks_equal)
	  MPI_Alltoall(work, p->send_block_size * el_size, p->el_type,
		       local_data, p->recv_block_size * el_size, p->el_type,
		       p->comm);
     else {
	  int i, n_pes = p->n_pes;

	  for (i = 0; i < n_pes; ++i) {
	       p->send_block_sizes[i] *= el_size;
	       p->recv_block_sizes[i] *= el_size;
	       p->send_block_offsets[i] *= el_size;
	       p->recv_block_offsets[i] *= el_size;
	  }
	  MPI_Alltoallv(work, p->send_block_sizes, p->send_block_offsets,
			p->el_type,
			local_data, p->recv_block_sizes, p->recv_block_offsets,
			p->el_type,
			p->comm);
	  for (i = 0; i < n_pes; ++i) {
	       p->send_block_sizes[i] /= el_size;
	       p->recv_block_sizes[i] /= el_size;
	       p->send_block_offsets[i] /= el_size;
	       p->recv_block_offsets[i] /= el_size;
	  }
     }

     do_permutation(local_data, p->perm_block_dest, p->num_perm_blocks,
		    p->perm_block_size * el_size);
}
예제 #2
0
void transpose_in_place_local(transpose_mpi_plan p,
			      int el_size, TRANSPOSE_EL_TYPE *local_data,
			      transpose_in_place_which which)
{
     switch (which) {
	 case BEFORE_TRANSPOSE:
	      if (el_size == 1)
           TOMS_transpose_2d(local_data, p->local_nx, p->ny, p->move, p->move_size);
	      else
		   TOMS_transpose_2d_arbitrary(local_data,
					       p->local_nx, p->ny,
					       el_size,
					       p->move, p->move_size);
	      break;
	 case AFTER_TRANSPOSE:
	      do_permutation(local_data, p->perm_block_dest,
			     p->num_perm_blocks, p->perm_block_size * el_size);
	      break;
     }
}