void comm_init(int ndim, const int *dims, QudaCommsMap rank_from_coords, void *map_data) { int initialized; MPI_CHECK( MPI_Initialized(&initialized) ); if (!initialized) { errorQuda("MPI has not been initialized"); } MPI_CHECK( MPI_Comm_rank(MPI_COMM_WORLD, &rank) ); MPI_CHECK( MPI_Comm_size(MPI_COMM_WORLD, &size) ); int grid_size = 1; for (int i = 0; i < ndim; i++) { grid_size *= dims[i]; } if (grid_size != size) { errorQuda("Communication grid size declared via initCommsGridQuda() does not match" " total number of MPI ranks (%d != %d)", grid_size, size); } Topology *topo = comm_create_topology(ndim, dims, rank_from_coords, map_data); comm_set_default_topology(topo); // determine which GPU this MPI rank will use char *hostname = comm_hostname(); char *hostname_recv_buf = (char *)safe_malloc(128*size); MPI_CHECK( MPI_Allgather(hostname, 128, MPI_CHAR, hostname_recv_buf, 128, MPI_CHAR, MPI_COMM_WORLD) ); gpuid = 0; for (int i = 0; i < rank; i++) { if (!strncmp(hostname, &hostname_recv_buf[128*i], 128)) { gpuid++; } } host_free(hostname_recv_buf); int device_count; cudaGetDeviceCount(&device_count); if (device_count == 0) { errorQuda("No CUDA devices found"); } if (gpuid >= device_count) { errorQuda("Too few GPUs available on %s", hostname); } }
static void comm_partition(void) { /* printf("xgridsize=%d\n", xgridsize); printf("ygridsize=%d\n", ygridsize); printf("zgridsize=%d\n", zgridsize); printf("tgridsize=%d\n", tgridsize); */ if(xgridsize*ygridsize*zgridsize*tgridsize != size){ if (rank ==0){ printf("ERROR: Invalid configuration (t,z,y,x gridsize=%d %d %d %d) " "but # of MPI processes is %d\n", tgridsize, zgridsize, ygridsize, xgridsize, size); } comm_exit(1); } int leftover; #ifdef X_FASTEST_DIM_NODE_RANKING tgridid = rank/(zgridsize*ygridsize*xgridsize); leftover = rank%(zgridsize*ygridsize*xgridsize); zgridid = leftover/(ygridsize*xgridsize); leftover = leftover%(ygridsize*xgridsize); ygridid = leftover/xgridsize; xgridid = leftover%xgridsize; #define GRID_ID(xid,yid,zid,tid) (tid*zgridsize*ygridsize*xgridsize+zid*ygridsize*xgridsize+yid*xgridsize+xid) #else xgridid = rank/(ygridsize*zgridsize*tgridsize); leftover = rank%(ygridsize*zgridsize*tgridsize); ygridid = leftover/(zgridsize*tgridsize); leftover = leftover%(zgridsize*tgridsize); zgridid = leftover/tgridsize; tgridid = leftover%tgridsize; #define GRID_ID(xid,yid,zid,tid) (xid*ygridsize*zgridsize*tgridsize+yid*zgridsize*tgridsize+zid*tgridsize+tid) #endif if (getVerbosity() >= QUDA_DEBUG_VERBOSE) printf("My rank: %d, gridid(t,z,y,x): %d %d %d %d\n", rank, tgridid, zgridid, ygridid, xgridid); int xid, yid, zid, tid; //X direction neighbors yid =ygridid; zid =zgridid; tid =tgridid; xid=(xgridid +1)%xgridsize; x_fwd_nbr = GRID_ID(xid,yid,zid,tid); xid=(xgridid -1+xgridsize)%xgridsize; x_back_nbr = GRID_ID(xid,yid,zid,tid); //Y direction neighbors xid =xgridid; zid =zgridid; tid =tgridid; yid =(ygridid+1)%ygridsize; y_fwd_nbr = GRID_ID(xid,yid,zid,tid); yid=(ygridid -1+ygridsize)%ygridsize; y_back_nbr = GRID_ID(xid,yid,zid,tid); //Z direction neighbors xid =xgridid; yid =ygridid; tid =tgridid; zid =(zgridid+1)%zgridsize; z_fwd_nbr = GRID_ID(xid,yid,zid,tid); zid=(zgridid -1+zgridsize)%zgridsize; z_back_nbr = GRID_ID(xid,yid,zid,tid); //T direction neighbors xid =xgridid; yid =ygridid; zid =zgridid; tid =(tgridid+1)%tgridsize; t_fwd_nbr = GRID_ID(xid,yid,zid,tid); tid=(tgridid -1+tgridsize)%tgridsize; t_back_nbr = GRID_ID(xid,yid,zid,tid); if (getVerbosity() >= QUDA_DEBUG_VERBOSE) { printf("MPI rank: rank=%d, hostname=%s, x_fwd_nbr=%d, x_back_nbr=%d\n", rank, comm_hostname(), x_fwd_nbr, x_back_nbr); printf("MPI rank: rank=%d, hostname=%s, y_fwd_nbr=%d, y_back_nbr=%d\n", rank, comm_hostname(), y_fwd_nbr, y_back_nbr); printf("MPI rank: rank=%d, hostname=%s, z_fwd_nbr=%d, z_back_nbr=%d\n", rank, comm_hostname(), z_fwd_nbr, z_back_nbr); printf("MPI rank: rank=%d, hostname=%s, t_fwd_nbr=%d, t_back_nbr=%d\n", rank, comm_hostname(), t_fwd_nbr, t_back_nbr); } }