コード例 #1
0
ファイル: comm_kokkos.cpp プロジェクト: danicholson/lammps
void CommKokkos::reverse_comm_device()
{
  int n;
  MPI_Request request;
  AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
  double *buf;

  // exchange data with another proc
  // if other proc is self, just copy
  // if comm_f_only set, exchange or copy directly from f, don't pack

  k_sendlist.sync<DeviceType>();
  atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,F_MASK);

  for (int iswap = nswap-1; iswap >= 0; iswap--) {
    if (sendproc[iswap] != me) {
      if (comm_f_only) {
        if (size_reverse_recv[iswap])
            MPI_Irecv(k_buf_recv.view<DeviceType>().data(),size_reverse_recv[iswap],MPI_DOUBLE,
                    sendproc[iswap],0,world,&request);
        if (size_reverse_send[iswap]) {
          buf = atomKK->k_f.view<DeviceType>().data() +
            firstrecv[iswap]*atomKK->k_f.view<DeviceType>().extent(1);

          MPI_Send(buf,size_reverse_send[iswap],MPI_DOUBLE,
                   recvproc[iswap],0,world);
        }
        if (size_reverse_recv[iswap]) {
          MPI_Wait(&request,MPI_STATUS_IGNORE);
          atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
                           space,F_MASK);
        }
      } else {
        if (size_reverse_recv[iswap])
          MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
                    size_reverse_recv[iswap],MPI_DOUBLE,
                    sendproc[iswap],0,world,&request);
        n = avec->pack_reverse_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send);
        DeviceType::fence();
        if (n)
          MPI_Send(k_buf_send.view<DeviceType>().data(),n,
                   MPI_DOUBLE,recvproc[iswap],0,world);
        if (size_reverse_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
      }
      avec->unpack_reverse_kokkos(sendnum[iswap],k_sendlist,iswap,
                                k_buf_recv);
      DeviceType::fence();
    } else {
      if (sendnum[iswap])
        n = avec->unpack_reverse_self(sendnum[iswap],k_sendlist,iswap,
                                 firstrecv[iswap]);
    }
  }
}
コード例 #2
0
ファイル: comm_kokkos.cpp プロジェクト: ganzenmg/lammps
void CommKokkos::borders_device() {
  int i,n,itype,iswap,dim,ineed,twoneed,smax,rmax;
  int nsend,nrecv,sendflag,nfirst,nlast,ngroup;
  double lo,hi;
  int *type;
  double **x;
  double *buf,*mlo,*mhi;
  MPI_Request request;
  MPI_Status status;
  AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;

  ExecutionSpace exec_space = ExecutionSpaceFromDevice<DeviceType>::space;
  k_sendlist.modify<DeviceType>();
  atomKK->sync(exec_space,ALL_MASK);

  // do swaps over all 3 dimensions

  iswap = 0;
  smax = rmax = 0;

  for (dim = 0; dim < 3; dim++) {
    nlast = 0;
    twoneed = 2*maxneed[dim];
    for (ineed = 0; ineed < twoneed; ineed++) {

      // find atoms within slab boundaries lo/hi using <= and >=
      // check atoms between nfirst and nlast
      //   for first swaps in a dim, check owned and ghost
      //   for later swaps in a dim, only check newly arrived ghosts
      // store sent atom indices in list for use in future timesteps

      x = atom->x;
      if (style == SINGLE) {
        lo = slablo[iswap];
        hi = slabhi[iswap];
      } else {
        type = atom->type;
        mlo = multilo[iswap];
        mhi = multihi[iswap];
      }
      if (ineed % 2 == 0) {
        nfirst = nlast;
        nlast = atom->nlocal + atom->nghost;
      }

      nsend = 0;

      // sendflag = 0 if I do not send on this swap
      // sendneed test indicates receiver no longer requires data
      // e.g. due to non-PBC or non-uniform sub-domains

      if (ineed/2 >= sendneed[dim][ineed % 2]) sendflag = 0;
      else sendflag = 1;

      // find send atoms according to SINGLE vs MULTI
      // all atoms eligible versus atoms in bordergroup
      // only need to limit loop to bordergroup for first sends (ineed < 2)
      // on these sends, break loop in two: owned (in group) and ghost

      if (sendflag) {
        if (!bordergroup || ineed >= 2) {
          if (style == SINGLE) {
            typename ArrayTypes<DeviceType>::tdual_int_1d total_send("TS",1);
            total_send.h_view(0) = 0;
            if(exec_space == Device) {
              total_send.template modify<DeviceType>();
              total_send.template sync<LMPDeviceType>();
            }
            BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
                total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
            Kokkos::ParallelWorkRequest config((nlast-nfirst+127)/128,128);
            Kokkos::parallel_for(config,f);
            DeviceType::fence();
            total_send.template modify<DeviceType>();
            total_send.template sync<LMPHostType>();

            if(total_send.h_view(0) >= maxsendlist[iswap]) {
              grow_list(iswap,total_send.h_view(0));
              total_send.h_view(0) = 0;
              if(exec_space == Device) {
                total_send.template modify<LMPHostType>();
                total_send.template sync<LMPDeviceType>();
              }
              BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
                  total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
              Kokkos::ParallelWorkRequest config((nlast-nfirst+127)/128,128);
              Kokkos::parallel_for(config,f);
              DeviceType::fence();
              total_send.template modify<DeviceType>();
              total_send.template sync<LMPHostType>();
            }
            nsend = total_send.h_view(0);
          } else {
            error->all(FLERR,"Required border comm not yet "
                       "implemented with Kokkos\n");
            for (i = nfirst; i < nlast; i++) {
              itype = type[i];
              if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
                sendlist[iswap][nsend++] = i;
              }
            }
          }

        } else {
          error->all(FLERR,"Required border comm not yet "
                     "implemented with Kokkos\n");
          if (style == SINGLE) {
            ngroup = atom->nfirst;
            for (i = 0; i < ngroup; i++)
              if (x[i][dim] >= lo && x[i][dim] <= hi) {
                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
                sendlist[iswap][nsend++] = i;
              }
            for (i = atom->nlocal; i < nlast; i++)
              if (x[i][dim] >= lo && x[i][dim] <= hi) {
                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
                sendlist[iswap][nsend++] = i;
              }
          } else {
            ngroup = atom->nfirst;
            for (i = 0; i < ngroup; i++) {
              itype = type[i];
              if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
                sendlist[iswap][nsend++] = i;
              }
            }
            for (i = atom->nlocal; i < nlast; i++) {
              itype = type[i];
              if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
                sendlist[iswap][nsend++] = i;
              }
            }
          }
        }
      }

      // pack up list of border atoms

      if (nsend*size_border > maxsend)
        grow_send_kokkos(nsend*size_border,0);
      if (ghost_velocity) {
        error->all(FLERR,"Required border comm not yet "
                   "implemented with Kokkos\n");
        n = avec->pack_border_vel(nsend,sendlist[iswap],buf_send,
                                  pbc_flag[iswap],pbc[iswap]);
      }
      else
        n = avec->
          pack_border_kokkos(nsend,k_sendlist,k_buf_send,iswap,
                             pbc_flag[iswap],pbc[iswap],exec_space);

      // swap atoms with other proc
      // no MPI calls except SendRecv if nsend/nrecv = 0
      // put incoming ghosts at end of my atom arrays
      // if swapping with self, simply copy, no messages

      if (sendproc[iswap] != me) {
        MPI_Sendrecv(&nsend,1,MPI_INT,sendproc[iswap],0,
                     &nrecv,1,MPI_INT,recvproc[iswap],0,world,&status);
        if (nrecv*size_border > maxrecv) grow_recv_kokkos(nrecv*size_border);
        if (nrecv) MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(),
                             nrecv*size_border,MPI_DOUBLE,
                             recvproc[iswap],0,world,&request);
        if (n) MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),n,
                        MPI_DOUBLE,sendproc[iswap],0,world);
        if (nrecv) MPI_Wait(&request,&status);
        buf = buf_recv;
      } else {
        nrecv = nsend;
        buf = buf_send;
      }

      // unpack buffer

      if (ghost_velocity) {
        error->all(FLERR,"Required border comm not yet "
                   "implemented with Kokkos\n");
        avec->unpack_border_vel(nrecv,atom->nlocal+atom->nghost,buf);
      }
      else
        if (sendproc[iswap] != me)
          avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost,
                                     k_buf_recv,exec_space);
        else
          avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost,
                                     k_buf_send,exec_space);

      // set all pointers & counters

      smax = MAX(smax,nsend);
      rmax = MAX(rmax,nrecv);
      sendnum[iswap] = nsend;
      recvnum[iswap] = nrecv;
      size_forward_recv[iswap] = nrecv*size_forward;
      size_reverse_send[iswap] = nrecv*size_reverse;
      size_reverse_recv[iswap] = nsend*size_reverse;
      firstrecv[iswap] = atom->nlocal + atom->nghost;
      atom->nghost += nrecv;
      iswap++;
    }
  }

  // insure send/recv buffers are long enough for all forward & reverse comm

  int max = MAX(maxforward*smax,maxreverse*rmax);
  if (max > maxsend) grow_send_kokkos(max,0);
  max = MAX(maxforward*rmax,maxreverse*smax);
  if (max > maxrecv) grow_recv_kokkos(max);

  // reset global->local map

  if (map_style) atom->map_set();
  if (exec_space == Host) k_sendlist.sync<LMPDeviceType>();
  atomKK->modified(exec_space,ALL_MASK);
  DeviceType::fence();
}
コード例 #3
0
ファイル: comm_kokkos.cpp プロジェクト: ganzenmg/lammps
void CommKokkos::exchange_device()
{
  int i,m,nsend,nrecv,nrecv1,nrecv2,nlocal;
  double lo,hi,value;
  double **x;
  double *sublo,*subhi,*buf;
  MPI_Request request;
  MPI_Status status;
  AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;

  // clear global->local map for owned and ghost atoms
  // b/c atoms migrate to new procs in exchange() and
  //   new ghosts are created in borders()
  // map_set() is done at end of borders()
  // clear ghost count and any ghost bonus data internal to AtomVec

  if (map_style) atom->map_clear();
  atom->nghost = 0;
  atom->avec->clear_bonus();

  // subbox bounds for orthogonal or triclinic

  if (triclinic == 0) {
    sublo = domain->sublo;
    subhi = domain->subhi;
  } else {
    sublo = domain->sublo_lamda;
    subhi = domain->subhi_lamda;
  }

  atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);

  // loop over dimensions
  for (int dim = 0; dim < 3; dim++) {

    // fill buffer with atoms leaving my box, using < and >=
    // when atom is deleted, fill it in with last atom

    x = atom->x;
    lo = sublo[dim];
    hi = subhi[dim];
    nlocal = atom->nlocal;
    i = nsend = 0;

    if (true) {
      if (k_sendflag.h_view.dimension_0()<nlocal) k_sendflag.resize(nlocal);
      k_count.h_view(0) = k_exchange_sendlist.h_view.dimension_0();
      while (k_count.h_view(0)>=k_exchange_sendlist.h_view.dimension_0()) {
        k_count.h_view(0) = 0;
        k_count.modify<LMPHostType>();
        k_count.sync<DeviceType>();

        BuildExchangeListFunctor<DeviceType> 
          f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag,
            nlocal,dim,lo,hi);
        Kokkos::parallel_for(nlocal,f);
        DeviceType::fence();
        k_exchange_sendlist.modify<DeviceType>();
        k_sendflag.modify<DeviceType>();
        k_count.modify<DeviceType>();

        k_count.sync<LMPHostType>();
        if (k_count.h_view(0)>=k_exchange_sendlist.h_view.dimension_0()) {
          k_exchange_sendlist.resize(k_count.h_view(0)*1.1);
          k_exchange_copylist.resize(k_count.h_view(0)*1.1);
          k_count.h_view(0)=k_exchange_sendlist.h_view.dimension_0();
        }
      }
      k_exchange_sendlist.sync<LMPHostType>();
      k_sendflag.sync<LMPHostType>();

      int sendpos = nlocal-1;
      nlocal -= k_count.h_view(0);
      for(int i = 0; i < k_count.h_view(0); i++) {
        if (k_exchange_sendlist.h_view(i)<nlocal) {
          while (k_sendflag.h_view(sendpos)) sendpos--;
          k_exchange_copylist.h_view(i) = sendpos;
          sendpos--;
        } else
        k_exchange_copylist.h_view(i) = -1;
      }

      k_exchange_copylist.modify<LMPHostType>();
      k_exchange_copylist.sync<DeviceType>();
      nsend = 
        avec->pack_exchange_kokkos(k_count.h_view(0),k_buf_send,
                                   k_exchange_sendlist,k_exchange_copylist,
                                   ExecutionSpaceFromDevice<DeviceType>::
                                   space,dim,lo,hi);
      DeviceType::fence();

    } else {
      while (i < nlocal) {
        if (x[i][dim] < lo || x[i][dim] >= hi) {
          if (nsend > maxsend) grow_send_kokkos(nsend,1);
          nsend += avec->pack_exchange(i,&buf_send[nsend]);
          avec->copy(nlocal-1,i,1);
          nlocal--;
        } else i++;
      }
    }
    atom->nlocal = nlocal;

    // send/recv atoms in both directions
    // if 1 proc in dimension, no send/recv, set recv buf to send buf
    // if 2 procs in dimension, single send/recv
    // if more than 2 procs in dimension, send/recv to both neighbors

    if (procgrid[dim] == 1) {
      nrecv = nsend;
      buf = buf_send;
      if (nrecv) {
        atom->nlocal=avec->
          unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi,
                                 ExecutionSpaceFromDevice<DeviceType>::space);
        DeviceType::fence();
      }
    } else {
      MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0,
                   &nrecv1,1,MPI_INT,procneigh[dim][1],0,world,&status);
      nrecv = nrecv1;
      if (procgrid[dim] > 2) {
        MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][1],0,
                     &nrecv2,1,MPI_INT,procneigh[dim][0],0,world,&status);
        nrecv += nrecv2;
      }
      if (nrecv > maxrecv) grow_recv_kokkos(nrecv);

      MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(),nrecv1,
                MPI_DOUBLE,procneigh[dim][1],0,
                world,&request);
      MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),nsend,
               MPI_DOUBLE,procneigh[dim][0],0,world);
      MPI_Wait(&request,&status);

      if (procgrid[dim] > 2) {
        MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device()+nrecv1,
                  nrecv2,MPI_DOUBLE,procneigh[dim][0],0,
                  world,&request);
        MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),nsend,
                 MPI_DOUBLE,procneigh[dim][1],0,world);
        MPI_Wait(&request,&status);
      }

      buf = buf_recv;
      if (nrecv) {
        atom->nlocal = avec->
          unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi,
                                 ExecutionSpaceFromDevice<DeviceType>::space);
        DeviceType::fence();
      }
    }

    // check incoming atoms to see if they are in my box
    // if so, add to my list

  }

  atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);

  if (atom->firstgroupname) {
    /* this is not yet implemented with Kokkos */
    atomKK->sync(Host,ALL_MASK);
    atom->first_reorder();
    atomKK->modified(Host,ALL_MASK);
  }
}
コード例 #4
0
ファイル: comm_kokkos.cpp プロジェクト: ganzenmg/lammps
void CommKokkos::forward_comm_device(int dummy)
{
  int n;
  MPI_Request request;
  MPI_Status status;
  AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
  double **x = atom->x;
  double *buf;

  // exchange data with another proc
  // if other proc is self, just copy
  // if comm_x_only set, exchange or copy directly to x, don't unpack

  k_sendlist.sync<DeviceType>();

  for (int iswap = 0; iswap < nswap; iswap++) {

    if (sendproc[iswap] != me) {
      if (comm_x_only) {
        atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
        if (size_forward_recv[iswap]) buf = x[firstrecv[iswap]];
        else buf = NULL;

        if (size_forward_recv[iswap]) {
            buf = atomKK->k_x.view<DeviceType>().ptr_on_device() + 
              firstrecv[iswap]*atomKK->k_x.view<DeviceType>().dimension_1();
            MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
                    recvproc[iswap],0,world,&request);
        }
        n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,
                                   iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);

        if (n) {
          MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),
                   n,MPI_DOUBLE,sendproc[iswap],0,world);
        }

        if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
        atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
                         space,X_MASK);
      } else if (ghost_velocity) {
        error->all(FLERR,"Ghost velocity forward comm not yet "
                   "implemented with Kokkos");
        if (size_forward_recv[iswap])
          MPI_Irecv(k_buf_recv.view<LMPHostType>().ptr_on_device(),
                    size_forward_recv[iswap],MPI_DOUBLE,
                    recvproc[iswap],0,world,&request);
        n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
                                buf_send,pbc_flag[iswap],pbc[iswap]);
        if (n) MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap],0,world);
        if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
        avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_recv);
      } else {
        if (size_forward_recv[iswap])
          MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(),
                    size_forward_recv[iswap],MPI_DOUBLE,
                    recvproc[iswap],0,world,&request);
        n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap,
                                   k_buf_send,pbc_flag[iswap],pbc[iswap]);
        if (n)
          MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),n,
                   MPI_DOUBLE,sendproc[iswap],0,world);
        if (size_forward_recv[iswap]) MPI_Wait(&request,&status);
        avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
      }

    } else {
      if (!ghost_velocity) {
        if (sendnum[iswap])
          n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap,
                                   firstrecv[iswap],pbc_flag[iswap],pbc[iswap]);
      } else if (ghost_velocity) {
        error->all(FLERR,"Ghost velocity forward comm not yet "
                   "implemented with Kokkos");
        n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
                                buf_send,pbc_flag[iswap],pbc[iswap]);
        avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_send);
      }
    }
  }
}
コード例 #5
0
ファイル: comm_kokkos.cpp プロジェクト: danicholson/lammps
void CommKokkos::forward_comm_device(int dummy)
{
  int n;
  MPI_Request request;
  AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
  double *buf;

  // exchange data with another proc
  // if other proc is self, just copy
  // if comm_x_only set, exchange or copy directly to x, don't unpack

  k_sendlist.sync<DeviceType>();
  atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);

  for (int iswap = 0; iswap < nswap; iswap++) {
    if (sendproc[iswap] != me) {
      if (comm_x_only) {
        if (size_forward_recv[iswap]) {
          buf = atomKK->k_x.view<DeviceType>().data() +
            firstrecv[iswap]*atomKK->k_x.view<DeviceType>().extent(1);
          MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
                    recvproc[iswap],0,world,&request);
        }
        n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,
                                   iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);
        DeviceType::fence();
        if (n) {
          MPI_Send(k_buf_send.view<DeviceType>().data(),
                   n,MPI_DOUBLE,sendproc[iswap],0,world);
        }

        if (size_forward_recv[iswap]) {
          MPI_Wait(&request,MPI_STATUS_IGNORE);
          atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
                           space,X_MASK);
        }
      } else if (ghost_velocity) {
        if (size_forward_recv[iswap]) {
          MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
                    size_forward_recv[iswap],MPI_DOUBLE,
                    recvproc[iswap],0,world,&request);
        }
        n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
                                       k_buf_send,pbc_flag[iswap],pbc[iswap]);
        DeviceType::fence();
        if (n) {
          MPI_Send(k_buf_send.view<DeviceType>().data(),n,
                   MPI_DOUBLE,sendproc[iswap],0,world);
        }
        if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
        avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
        DeviceType::fence();
      } else {
        if (size_forward_recv[iswap])
          MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
                    size_forward_recv[iswap],MPI_DOUBLE,
                    recvproc[iswap],0,world,&request);
        n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap,
                                   k_buf_send,pbc_flag[iswap],pbc[iswap]);
        DeviceType::fence();
        if (n)
          MPI_Send(k_buf_send.view<DeviceType>().data(),n,
                   MPI_DOUBLE,sendproc[iswap],0,world);
        if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
        avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
        DeviceType::fence();
      }
    } else {
      if (!ghost_velocity) {
        if (sendnum[iswap])
          n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap,
                                   firstrecv[iswap],pbc_flag[iswap],pbc[iswap]);
        DeviceType::fence();
      } else {
        n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
                                       k_buf_send,pbc_flag[iswap],pbc[iswap]);
        DeviceType::fence();
        avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send);
        DeviceType::fence();
      }
    }
  }
}