コード例 #1
0
ファイル: comm_kokkos.cpp プロジェクト: ganzenmg/lammps
void CommKokkos::borders_device() {
  int i,n,itype,iswap,dim,ineed,twoneed,smax,rmax;
  int nsend,nrecv,sendflag,nfirst,nlast,ngroup;
  double lo,hi;
  int *type;
  double **x;
  double *buf,*mlo,*mhi;
  MPI_Request request;
  MPI_Status status;
  AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;

  ExecutionSpace exec_space = ExecutionSpaceFromDevice<DeviceType>::space;
  k_sendlist.modify<DeviceType>();
  atomKK->sync(exec_space,ALL_MASK);

  // do swaps over all 3 dimensions

  iswap = 0;
  smax = rmax = 0;

  for (dim = 0; dim < 3; dim++) {
    nlast = 0;
    twoneed = 2*maxneed[dim];
    for (ineed = 0; ineed < twoneed; ineed++) {

      // find atoms within slab boundaries lo/hi using <= and >=
      // check atoms between nfirst and nlast
      //   for first swaps in a dim, check owned and ghost
      //   for later swaps in a dim, only check newly arrived ghosts
      // store sent atom indices in list for use in future timesteps

      x = atom->x;
      if (style == SINGLE) {
        lo = slablo[iswap];
        hi = slabhi[iswap];
      } else {
        type = atom->type;
        mlo = multilo[iswap];
        mhi = multihi[iswap];
      }
      if (ineed % 2 == 0) {
        nfirst = nlast;
        nlast = atom->nlocal + atom->nghost;
      }

      nsend = 0;

      // sendflag = 0 if I do not send on this swap
      // sendneed test indicates receiver no longer requires data
      // e.g. due to non-PBC or non-uniform sub-domains

      if (ineed/2 >= sendneed[dim][ineed % 2]) sendflag = 0;
      else sendflag = 1;

      // find send atoms according to SINGLE vs MULTI
      // all atoms eligible versus atoms in bordergroup
      // only need to limit loop to bordergroup for first sends (ineed < 2)
      // on these sends, break loop in two: owned (in group) and ghost

      if (sendflag) {
        if (!bordergroup || ineed >= 2) {
          if (style == SINGLE) {
            typename ArrayTypes<DeviceType>::tdual_int_1d total_send("TS",1);
            total_send.h_view(0) = 0;
            if(exec_space == Device) {
              total_send.template modify<DeviceType>();
              total_send.template sync<LMPDeviceType>();
            }
            BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
                total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
            Kokkos::ParallelWorkRequest config((nlast-nfirst+127)/128,128);
            Kokkos::parallel_for(config,f);
            DeviceType::fence();
            total_send.template modify<DeviceType>();
            total_send.template sync<LMPHostType>();

            if(total_send.h_view(0) >= maxsendlist[iswap]) {
              grow_list(iswap,total_send.h_view(0));
              total_send.h_view(0) = 0;
              if(exec_space == Device) {
                total_send.template modify<LMPHostType>();
                total_send.template sync<LMPDeviceType>();
              }
              BuildBorderListFunctor<DeviceType> f(atomKK->k_x,k_sendlist,
                  total_send,nfirst,nlast,dim,lo,hi,iswap,maxsendlist[iswap]);
              Kokkos::ParallelWorkRequest config((nlast-nfirst+127)/128,128);
              Kokkos::parallel_for(config,f);
              DeviceType::fence();
              total_send.template modify<DeviceType>();
              total_send.template sync<LMPHostType>();
            }
            nsend = total_send.h_view(0);
          } else {
            error->all(FLERR,"Required border comm not yet "
                       "implemented with Kokkos\n");
            for (i = nfirst; i < nlast; i++) {
              itype = type[i];
              if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
                sendlist[iswap][nsend++] = i;
              }
            }
          }

        } else {
          error->all(FLERR,"Required border comm not yet "
                     "implemented with Kokkos\n");
          if (style == SINGLE) {
            ngroup = atom->nfirst;
            for (i = 0; i < ngroup; i++)
              if (x[i][dim] >= lo && x[i][dim] <= hi) {
                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
                sendlist[iswap][nsend++] = i;
              }
            for (i = atom->nlocal; i < nlast; i++)
              if (x[i][dim] >= lo && x[i][dim] <= hi) {
                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
                sendlist[iswap][nsend++] = i;
              }
          } else {
            ngroup = atom->nfirst;
            for (i = 0; i < ngroup; i++) {
              itype = type[i];
              if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
                sendlist[iswap][nsend++] = i;
              }
            }
            for (i = atom->nlocal; i < nlast; i++) {
              itype = type[i];
              if (x[i][dim] >= mlo[itype] && x[i][dim] <= mhi[itype]) {
                if (nsend == maxsendlist[iswap]) grow_list(iswap,nsend);
                sendlist[iswap][nsend++] = i;
              }
            }
          }
        }
      }

      // pack up list of border atoms

      if (nsend*size_border > maxsend)
        grow_send_kokkos(nsend*size_border,0);
      if (ghost_velocity) {
        error->all(FLERR,"Required border comm not yet "
                   "implemented with Kokkos\n");
        n = avec->pack_border_vel(nsend,sendlist[iswap],buf_send,
                                  pbc_flag[iswap],pbc[iswap]);
      }
      else
        n = avec->
          pack_border_kokkos(nsend,k_sendlist,k_buf_send,iswap,
                             pbc_flag[iswap],pbc[iswap],exec_space);

      // swap atoms with other proc
      // no MPI calls except SendRecv if nsend/nrecv = 0
      // put incoming ghosts at end of my atom arrays
      // if swapping with self, simply copy, no messages

      if (sendproc[iswap] != me) {
        MPI_Sendrecv(&nsend,1,MPI_INT,sendproc[iswap],0,
                     &nrecv,1,MPI_INT,recvproc[iswap],0,world,&status);
        if (nrecv*size_border > maxrecv) grow_recv_kokkos(nrecv*size_border);
        if (nrecv) MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(),
                             nrecv*size_border,MPI_DOUBLE,
                             recvproc[iswap],0,world,&request);
        if (n) MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),n,
                        MPI_DOUBLE,sendproc[iswap],0,world);
        if (nrecv) MPI_Wait(&request,&status);
        buf = buf_recv;
      } else {
        nrecv = nsend;
        buf = buf_send;
      }

      // unpack buffer

      if (ghost_velocity) {
        error->all(FLERR,"Required border comm not yet "
                   "implemented with Kokkos\n");
        avec->unpack_border_vel(nrecv,atom->nlocal+atom->nghost,buf);
      }
      else
        if (sendproc[iswap] != me)
          avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost,
                                     k_buf_recv,exec_space);
        else
          avec->unpack_border_kokkos(nrecv,atom->nlocal+atom->nghost,
                                     k_buf_send,exec_space);

      // set all pointers & counters

      smax = MAX(smax,nsend);
      rmax = MAX(rmax,nrecv);
      sendnum[iswap] = nsend;
      recvnum[iswap] = nrecv;
      size_forward_recv[iswap] = nrecv*size_forward;
      size_reverse_send[iswap] = nrecv*size_reverse;
      size_reverse_recv[iswap] = nsend*size_reverse;
      firstrecv[iswap] = atom->nlocal + atom->nghost;
      atom->nghost += nrecv;
      iswap++;
    }
  }

  // insure send/recv buffers are long enough for all forward & reverse comm

  int max = MAX(maxforward*smax,maxreverse*rmax);
  if (max > maxsend) grow_send_kokkos(max,0);
  max = MAX(maxforward*rmax,maxreverse*smax);
  if (max > maxrecv) grow_recv_kokkos(max);

  // reset global->local map

  if (map_style) atom->map_set();
  if (exec_space == Host) k_sendlist.sync<LMPDeviceType>();
  atomKK->modified(exec_space,ALL_MASK);
  DeviceType::fence();
}