Example #1
0
void comm_init(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data)
{
  int initialized;
  MPI_CHECK( MPI_Initialized(&initialized) );

  if (!initialized) {
    errorQuda("MPI has not been initialized");
  }

  MPI_CHECK( MPI_Comm_rank(MPI_COMM_WORLD, &rank) );
  MPI_CHECK( MPI_Comm_size(MPI_COMM_WORLD, &size) );

  int grid_size = 1;
  for (int i = 0; i < ndim; i++) {
    grid_size *= dims[i];
  }
  if (grid_size != size) {
    errorQuda("Communication grid size declared via initCommsGridQuda() does not match"
              " total number of MPI ranks (%d != %d)", grid_size, size);
  }

  Topology *topo = comm_create_topology(ndim, dims, rank_from_coords, map_data);
  comm_set_default_topology(topo);

  // determine which GPU this MPI rank will use
  char *hostname = comm_hostname();
  char *hostname_recv_buf = (char *)safe_malloc(128*size);
  
  MPI_CHECK( MPI_Allgather(hostname, 128, MPI_CHAR, hostname_recv_buf, 128, MPI_CHAR, MPI_COMM_WORLD) );

  gpuid = 0;
  for (int i = 0; i < rank; i++) {
    if (!strncmp(hostname, &hostname_recv_buf[128*i], 128)) {
      gpuid++;
    }
  }
  host_free(hostname_recv_buf);

  int device_count;
  cudaGetDeviceCount(&device_count);
  if (device_count == 0) {
    errorQuda("No CUDA devices found");
  }
  if (gpuid >= device_count) {
    errorQuda("Too few GPUs available on %s", hostname);
  }
}
Example #2
0
static void
comm_partition(void)
{
  /*
  printf("xgridsize=%d\n", xgridsize);
  printf("ygridsize=%d\n", ygridsize);
  printf("zgridsize=%d\n", zgridsize);
  printf("tgridsize=%d\n", tgridsize);
  */
  if(xgridsize*ygridsize*zgridsize*tgridsize != size){
    if (rank ==0){
      printf("ERROR: Invalid configuration (t,z,y,x gridsize=%d %d %d %d) "
             "but # of MPI processes is %d\n", tgridsize, zgridsize, ygridsize, xgridsize, size);
    }
    comm_exit(1);
  }

  int leftover;

#ifdef X_FASTEST_DIM_NODE_RANKING
  tgridid  = rank/(zgridsize*ygridsize*xgridsize);
  leftover = rank%(zgridsize*ygridsize*xgridsize);
  zgridid  = leftover/(ygridsize*xgridsize);
  leftover = leftover%(ygridsize*xgridsize);
  ygridid  = leftover/xgridsize;
  xgridid  = leftover%xgridsize;
  #define GRID_ID(xid,yid,zid,tid) (tid*zgridsize*ygridsize*xgridsize+zid*ygridsize*xgridsize+yid*xgridsize+xid)
#else
  xgridid  = rank/(ygridsize*zgridsize*tgridsize);
  leftover = rank%(ygridsize*zgridsize*tgridsize);
  ygridid  = leftover/(zgridsize*tgridsize);
  leftover = leftover%(zgridsize*tgridsize);
  zgridid  = leftover/tgridsize;
  tgridid  = leftover%tgridsize;  
#define GRID_ID(xid,yid,zid,tid) (xid*ygridsize*zgridsize*tgridsize+yid*zgridsize*tgridsize+zid*tgridsize+tid)
#endif

  if (getVerbosity() >= QUDA_DEBUG_VERBOSE)
    printf("My rank: %d, gridid(t,z,y,x): %d %d %d %d\n", rank, tgridid, zgridid, ygridid, xgridid);


  int xid, yid, zid, tid;
  //X direction neighbors
  yid =ygridid;
  zid =zgridid;
  tid =tgridid;
  xid=(xgridid +1)%xgridsize;
  x_fwd_nbr = GRID_ID(xid,yid,zid,tid);
  xid=(xgridid -1+xgridsize)%xgridsize;
  x_back_nbr = GRID_ID(xid,yid,zid,tid);

  //Y direction neighbors
  xid =xgridid;
  zid =zgridid;
  tid =tgridid;
  yid =(ygridid+1)%ygridsize;
  y_fwd_nbr = GRID_ID(xid,yid,zid,tid);
  yid=(ygridid -1+ygridsize)%ygridsize;
  y_back_nbr = GRID_ID(xid,yid,zid,tid);

  //Z direction neighbors
  xid =xgridid;
  yid =ygridid;
  tid =tgridid;
  zid =(zgridid+1)%zgridsize;
  z_fwd_nbr = GRID_ID(xid,yid,zid,tid);
  zid=(zgridid -1+zgridsize)%zgridsize;
  z_back_nbr = GRID_ID(xid,yid,zid,tid);

  //T direction neighbors
  xid =xgridid;
  yid =ygridid;
  zid =zgridid;
  tid =(tgridid+1)%tgridsize;
  t_fwd_nbr = GRID_ID(xid,yid,zid,tid);
  tid=(tgridid -1+tgridsize)%tgridsize;
  t_back_nbr = GRID_ID(xid,yid,zid,tid);

  if (getVerbosity() >= QUDA_DEBUG_VERBOSE) {
    printf("MPI rank: rank=%d, hostname=%s, x_fwd_nbr=%d, x_back_nbr=%d\n", rank, comm_hostname(), x_fwd_nbr, x_back_nbr);
    printf("MPI rank: rank=%d, hostname=%s, y_fwd_nbr=%d, y_back_nbr=%d\n", rank, comm_hostname(), y_fwd_nbr, y_back_nbr);
    printf("MPI rank: rank=%d, hostname=%s, z_fwd_nbr=%d, z_back_nbr=%d\n", rank, comm_hostname(), z_fwd_nbr, z_back_nbr);
    printf("MPI rank: rank=%d, hostname=%s, t_fwd_nbr=%d, t_back_nbr=%d\n", rank, comm_hostname(), t_fwd_nbr, t_back_nbr);
  }
}