void CommKokkos::forward_comm_device(int dummy) { int n; MPI_Request request; MPI_Status status; AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec; double **x = atom->x; double *buf; // exchange data with another proc // if other proc is self, just copy // if comm_x_only set, exchange or copy directly to x, don't unpack k_sendlist.sync<DeviceType>(); for (int iswap = 0; iswap < nswap; iswap++) { if (sendproc[iswap] != me) { if (comm_x_only) { atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK); if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]]; else buf = NULL; if (size_forward_recv[iswap]) { buf = atomKK->k_x.view<DeviceType>().ptr_on_device() + firstrecv[iswap]*atomKK->k_x.view<DeviceType>().dimension_1(); MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE, recvproc[iswap],0,world,&request); } n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist, iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]); if (n) { MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(), n,MPI_DOUBLE,sendproc[iswap],0,world); } if (size_forward_recv[iswap]) MPI_Wait(&request,&status); atomKK->modified(ExecutionSpaceFromDevice<DeviceType>:: space,X_MASK); } else if (ghost_velocity) { error->all(FLERR,"Ghost velocity forward comm not yet " "implemented with Kokkos"); if (size_forward_recv[iswap]) MPI_Irecv(k_buf_recv.view<LMPHostType>().ptr_on_device(), size_forward_recv[iswap],MPI_DOUBLE, recvproc[iswap],0,world,&request); n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap], buf_send,pbc_flag[iswap],pbc[iswap]); if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world); if (size_forward_recv[iswap]) MPI_Wait(&request,&status); avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_recv); } else { if (size_forward_recv[iswap]) MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(), size_forward_recv[iswap],MPI_DOUBLE, recvproc[iswap],0,world,&request); n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap, k_buf_send,pbc_flag[iswap],pbc[iswap]); if (n) MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),n, MPI_DOUBLE,sendproc[iswap],0,world); if (size_forward_recv[iswap]) MPI_Wait(&request,&status); avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv); } } else { if (!ghost_velocity) { if (sendnum[iswap]) n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap, firstrecv[iswap],pbc_flag[iswap],pbc[iswap]); } else if (ghost_velocity) { error->all(FLERR,"Ghost velocity forward comm not yet " "implemented with Kokkos"); n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap], buf_send,pbc_flag[iswap],pbc[iswap]); avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_send); } } } }
void CommKokkos::forward_comm_device(int dummy) { int n; MPI_Request request; AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec; double *buf; // exchange data with another proc // if other proc is self, just copy // if comm_x_only set, exchange or copy directly to x, don't unpack k_sendlist.sync<DeviceType>(); atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK); for (int iswap = 0; iswap < nswap; iswap++) { if (sendproc[iswap] != me) { if (comm_x_only) { if (size_forward_recv[iswap]) { buf = atomKK->k_x.view<DeviceType>().data() + firstrecv[iswap]*atomKK->k_x.view<DeviceType>().extent(1); MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE, recvproc[iswap],0,world,&request); } n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist, iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]); DeviceType::fence(); if (n) { MPI_Send(k_buf_send.view<DeviceType>().data(), n,MPI_DOUBLE,sendproc[iswap],0,world); } if (size_forward_recv[iswap]) { MPI_Wait(&request,MPI_STATUS_IGNORE); atomKK->modified(ExecutionSpaceFromDevice<DeviceType>:: space,X_MASK); } } else if (ghost_velocity) { if (size_forward_recv[iswap]) { MPI_Irecv(k_buf_recv.view<DeviceType>().data(), size_forward_recv[iswap],MPI_DOUBLE, recvproc[iswap],0,world,&request); } n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap, k_buf_send,pbc_flag[iswap],pbc[iswap]); DeviceType::fence(); if (n) { MPI_Send(k_buf_send.view<DeviceType>().data(),n, MPI_DOUBLE,sendproc[iswap],0,world); } if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv); DeviceType::fence(); } else { if (size_forward_recv[iswap]) MPI_Irecv(k_buf_recv.view<DeviceType>().data(), size_forward_recv[iswap],MPI_DOUBLE, recvproc[iswap],0,world,&request); n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap, k_buf_send,pbc_flag[iswap],pbc[iswap]); DeviceType::fence(); if (n) MPI_Send(k_buf_send.view<DeviceType>().data(),n, MPI_DOUBLE,sendproc[iswap],0,world); if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv); DeviceType::fence(); } } else { if (!ghost_velocity) { if (sendnum[iswap]) n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap, firstrecv[iswap],pbc_flag[iswap],pbc[iswap]); DeviceType::fence(); } else { n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap, k_buf_send,pbc_flag[iswap],pbc[iswap]); DeviceType::fence(); avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send); DeviceType::fence(); } } } }