void CommKokkos::borders_device() { int i,n,itype,iswap,dim,ineed,twoneed,smax,rmax; int nsend,nrecv,sendflag,nfirst,nlast,ngroup; double lo,hi; int *type; double **x; double *buf,*mlo,*mhi; MPI_Request request; MPI_Status status; AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec; ExecutionSpace exec_space = ExecutionSpaceFromDevice<DeviceType>::space; k_sendlist.modify<DeviceType>(); atomKK->sync(exec_space,ALL_MASK); // do swaps over all 3 dimensions iswap = 0; smax = rmax = 0; for (dim = 0; dim < 3; dim++) { nlast = 0; twoneed = 2*maxneed[dim]; for (ineed = 0; ineed < twoneed; ineed++) { // find atoms within slab boundaries lo/hi using <= and >= // check atoms between nfirst and nlast // for first swaps in a dim, check owned and ghost // for later swaps in a dim, only check newly arrived ghosts // store sent atom indices in list for use in future timesteps x = atom->x; if (style == SINGLE) { lo = slablo[iswap]; hi = slabhi[iswap]; } else { type = atom->type; mlo = multilo[iswap]; mhi = multihi[iswap]; } if (ineed % 2 == 0) { nfirst = nlast; nlast = atom->nlocal + atom->nghost; } nsend = 0; // sendflag = 0 if I do not send on this swap // sendneed test indicates receiver no longer requires data // e.g. due to non-PBC or non-uniform sub-domains if (ineed/2 >= sendneed[dim][ineed % 2]) sendflag = 0; else sendflag = 1; // find send atoms according to SINGLE vs MULTI // all atoms eligible versus atoms in bordergroup // only need to limit loop to bordergroup for first sends (ineed < 2) // on these sends, break loop in two: owned (in group) and ghost if (sendflag) { if (!bordergroup || ineed >= 2) { if (style == SINGLE) { typename ArrayTypes<DeviceType>::tdual_int_1d total_send("TS",1); total_send.h_view(0) = 0; if(exec_space == Device) { total_send.template modify<DeviceType>(); total_send.template sync<LMPDeviceType>(); } BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist, total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]); Kokkos::ParallelWorkRequest config((nlast-nfirst+127)/128,128); Kokkos::parallel_for(config,f); DeviceType::fence(); total_send.template modify<DeviceType>(); total_send.template sync<LMPHostType>(); if(total_send.h_view(0) >= maxsendlist[iswap]) { grow_list(iswap,total_send.h_view(0)); total_send.h_view(0) = 0; if(exec_space == Device) { total_send.template modify<LMPHostType>(); total_send.template sync<LMPDeviceType>(); } BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist, total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]); Kokkos::ParallelWorkRequest config((nlast-nfirst+127)/128,128); Kokkos::parallel_for(config,f); DeviceType::fence(); total_send.template modify<DeviceType>(); total_send.template sync<LMPHostType>(); } nsend = total_send.h_view(0); } else { error->all(FLERR,"Required border comm not yet " "implemented with Kokkos\n"); for (i = nfirst; i < nlast; i++) { itype = type[i]; if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) { if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend); sendlist[iswap][nsend++] = i; } } } } else { error->all(FLERR,"Required border comm not yet " "implemented with Kokkos\n"); if (style == SINGLE) { ngroup = atom->nfirst; for (i = 0; i < ngroup; i++) if (x[i][dim] >= lo && x[i][dim] <= hi) { if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend); sendlist[iswap][nsend++] = i; } for (i = atom->nlocal; i < nlast; i++) if (x[i][dim] >= lo && x[i][dim] <= hi) { if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend); sendlist[iswap][nsend++] = i; } } else { ngroup = atom->nfirst; for (i = 0; i < ngroup; i++) { itype = type[i]; if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) { if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend); sendlist[iswap][nsend++] = i; } } for (i = atom->nlocal; i < nlast; i++) { itype = type[i]; if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) { if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend); sendlist[iswap][nsend++] = i; } } } } } // pack up list of border atoms if (nsend*size_border > maxsend) grow_send_kokkos(nsend*size_border,0); if (ghost_velocity) { error->all(FLERR,"Required border comm not yet " "implemented with Kokkos\n"); n = avec->pack_border_vel(nsend,sendlist[iswap],buf_send, pbc_flag[iswap],pbc[iswap]); } else n = avec-> pack_border_kokkos(nsend,k_sendlist,k_buf_send,iswap, pbc_flag[iswap],pbc[iswap],exec_space); // swap atoms with other proc // no MPI calls except SendRecv if nsend/nrecv = 0 // put incoming ghosts at end of my atom arrays // if swapping with self, simply copy, no messages if (sendproc[iswap] != me) { MPI_Sendrecv(&nsend,1,MPI_INT,sendproc[iswap],0, &nrecv,1,MPI_INT,recvproc[iswap],0,world,&status); if (nrecv*size_border > maxrecv) grow_recv_kokkos(nrecv*size_border); if (nrecv) MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(), nrecv*size_border,MPI_DOUBLE, recvproc[iswap],0,world,&request); if (n) MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),n, MPI_DOUBLE,sendproc[iswap],0,world); if (nrecv) MPI_Wait(&request,&status); buf = buf_recv; } else { nrecv = nsend; buf = buf_send; } // unpack buffer if (ghost_velocity) { error->all(FLERR,"Required border comm not yet " "implemented with Kokkos\n"); avec->unpack_border_vel(nrecv,atom->nlocal+atom->nghost,buf); } else if (sendproc[iswap] != me) avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost, k_buf_recv,exec_space); else avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost, k_buf_send,exec_space); // set all pointers & counters smax = MAX(smax,nsend); rmax = MAX(rmax,nrecv); sendnum[iswap] = nsend; recvnum[iswap] = nrecv; size_forward_recv[iswap] = nrecv*size_forward; size_reverse_send[iswap] = nrecv*size_reverse; size_reverse_recv[iswap] = nsend*size_reverse; firstrecv[iswap] = atom->nlocal + atom->nghost; atom->nghost += nrecv; iswap++; } } // insure send/recv buffers are long enough for all forward & reverse comm int max = MAX(maxforward*smax,maxreverse*rmax); if (max > maxsend) grow_send_kokkos(max,0); max = MAX(maxforward*rmax,maxreverse*smax); if (max > maxrecv) grow_recv_kokkos(max); // reset global->local map if (map_style) atom->map_set(); if (exec_space == Host) k_sendlist.sync<LMPDeviceType>(); atomKK->modified(exec_space,ALL_MASK); DeviceType::fence(); }