コード例 #1
0
ファイル: comm_kokkos.cpp プロジェクト: ganzenmg/lammps
void CommKokkos::exchange_device()
{
  int i,m,nsend,nrecv,nrecv1,nrecv2,nlocal;
  double lo,hi,value;
  double **x;
  double *sublo,*subhi,*buf;
  MPI_Request request;
  MPI_Status status;
  AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;

  // clear global->local map for owned and ghost atoms
  // b/c atoms migrate to new procs in exchange() and
  //   new ghosts are created in borders()
  // map_set() is done at end of borders()
  // clear ghost count and any ghost bonus data internal to AtomVec

  if (map_style) atom->map_clear();
  atom->nghost = 0;
  atom->avec->clear_bonus();

  // subbox bounds for orthogonal or triclinic

  if (triclinic == 0) {
    sublo = domain->sublo;
    subhi = domain->subhi;
  } else {
    sublo = domain->sublo_lamda;
    subhi = domain->subhi_lamda;
  }

  atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);

  // loop over dimensions
  for (int dim = 0; dim < 3; dim++) {

    // fill buffer with atoms leaving my box, using < and >=
    // when atom is deleted, fill it in with last atom

    x = atom->x;
    lo = sublo[dim];
    hi = subhi[dim];
    nlocal = atom->nlocal;
    i = nsend = 0;

    if (true) {
      if (k_sendflag.h_view.dimension_0()<nlocal) k_sendflag.resize(nlocal);
      k_count.h_view(0) = k_exchange_sendlist.h_view.dimension_0();
      while (k_count.h_view(0)>=k_exchange_sendlist.h_view.dimension_0()) {
        k_count.h_view(0) = 0;
        k_count.modify<LMPHostType>();
        k_count.sync<DeviceType>();

        BuildExchangeListFunctor<DeviceType> 
          f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag,
            nlocal,dim,lo,hi);
        Kokkos::parallel_for(nlocal,f);
        DeviceType::fence();
        k_exchange_sendlist.modify<DeviceType>();
        k_sendflag.modify<DeviceType>();
        k_count.modify<DeviceType>();

        k_count.sync<LMPHostType>();
        if (k_count.h_view(0)>=k_exchange_sendlist.h_view.dimension_0()) {
          k_exchange_sendlist.resize(k_count.h_view(0)*1.1);
          k_exchange_copylist.resize(k_count.h_view(0)*1.1);
          k_count.h_view(0)=k_exchange_sendlist.h_view.dimension_0();
        }
      }
      k_exchange_sendlist.sync<LMPHostType>();
      k_sendflag.sync<LMPHostType>();

      int sendpos = nlocal-1;
      nlocal -= k_count.h_view(0);
      for(int i = 0; i < k_count.h_view(0); i++) {
        if (k_exchange_sendlist.h_view(i)<nlocal) {
          while (k_sendflag.h_view(sendpos)) sendpos--;
          k_exchange_copylist.h_view(i) = sendpos;
          sendpos--;
        } else
        k_exchange_copylist.h_view(i) = -1;
      }

      k_exchange_copylist.modify<LMPHostType>();
      k_exchange_copylist.sync<DeviceType>();
      nsend = 
        avec->pack_exchange_kokkos(k_count.h_view(0),k_buf_send,
                                   k_exchange_sendlist,k_exchange_copylist,
                                   ExecutionSpaceFromDevice<DeviceType>::
                                   space,dim,lo,hi);
      DeviceType::fence();

    } else {
      while (i < nlocal) {
        if (x[i][dim] < lo || x[i][dim] >= hi) {
          if (nsend > maxsend) grow_send_kokkos(nsend,1);
          nsend += avec->pack_exchange(i,&buf_send[nsend]);
          avec->copy(nlocal-1,i,1);
          nlocal--;
        } else i++;
      }
    }
    atom->nlocal = nlocal;

    // send/recv atoms in both directions
    // if 1 proc in dimension, no send/recv, set recv buf to send buf
    // if 2 procs in dimension, single send/recv
    // if more than 2 procs in dimension, send/recv to both neighbors

    if (procgrid[dim] == 1) {
      nrecv = nsend;
      buf = buf_send;
      if (nrecv) {
        atom->nlocal=avec->
          unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi,
                                 ExecutionSpaceFromDevice<DeviceType>::space);
        DeviceType::fence();
      }
    } else {
      MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0,
                   &nrecv1,1,MPI_INT,procneigh[dim][1],0,world,&status);
      nrecv = nrecv1;
      if (procgrid[dim] > 2) {
        MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][1],0,
                     &nrecv2,1,MPI_INT,procneigh[dim][0],0,world,&status);
        nrecv += nrecv2;
      }
      if (nrecv > maxrecv) grow_recv_kokkos(nrecv);

      MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device(),nrecv1,
                MPI_DOUBLE,procneigh[dim][1],0,
                world,&request);
      MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),nsend,
               MPI_DOUBLE,procneigh[dim][0],0,world);
      MPI_Wait(&request,&status);

      if (procgrid[dim] > 2) {
        MPI_Irecv(k_buf_recv.view<DeviceType>().ptr_on_device()+nrecv1,
                  nrecv2,MPI_DOUBLE,procneigh[dim][0],0,
                  world,&request);
        MPI_Send(k_buf_send.view<DeviceType>().ptr_on_device(),nsend,
                 MPI_DOUBLE,procneigh[dim][1],0,world);
        MPI_Wait(&request,&status);
      }

      buf = buf_recv;
      if (nrecv) {
        atom->nlocal = avec->
          unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi,
                                 ExecutionSpaceFromDevice<DeviceType>::space);
        DeviceType::fence();
      }
    }

    // check incoming atoms to see if they are in my box
    // if so, add to my list

  }

  atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);

  if (atom->firstgroupname) {
    /* this is not yet implemented with Kokkos */
    atomKK->sync(Host,ALL_MASK);
    atom->first_reorder();
    atomKK->modified(Host,ALL_MASK);
  }
}