/** * @brief Generates the 2D topology and establishes the neighbor relationships between MPI processes * * @param[in, out] rank The rank of the calling MPI process * @param[in] size The total number of MPI processes available * @param[in] topSize The desired topology size (this must match the number of available MPI processes) * @param[out] neighbors The list that will be populated with the direct neighbors of the calling MPI process * @param[out] topIndex The 2D index that the calling MPI process will have in the topology * @param[out] cartComm The carthesian MPI communicator */ int ApplyTopology(int * rank, int size, const int2 * topSize, int * neighbors, int2 * topIndex, MPI_Comm * cartComm) { int topologySize = topSize->x * topSize->y; int dimSize[2] = {topSize->x, topSize->y}; int usePeriods[2] = {0, 0}, newCoords[2]; int oldRank = * rank; // The number of MPI processes must fill the topology if (size != topologySize) { OneErrPrintf(* rank == MPI_MASTER_RANK, "Error: The number of MPI processes (%d) doesn't match " "the topology size (%d).\n", size, topologySize); return STATUS_ERR; } // Create a carthesian communicator MPI_Cart_create(MPI_COMM_WORLD, 2, dimSize, usePeriods, 1, cartComm); // Update the rank to be relevant to the new communicator MPI_Comm_rank(* cartComm, rank); if ((* rank) != oldRank) { printf("Rank change: from %d to %d\n", oldRank, * rank); } // Obtain the 2D coordinates in the new communicator MPI_Cart_coords(* cartComm, * rank, 2, newCoords); * topIndex = make_int2(newCoords[0], newCoords[1]); // Obtain the direct neighbor ranks MPI_Cart_shift(* cartComm, 0, 1, neighbors + DIR_LEFT, neighbors + DIR_RIGHT); MPI_Cart_shift(* cartComm, 1, 1, neighbors + DIR_TOP, neighbors + DIR_BOTTOM); // Setting the device here will have effect only for the normal CUDA & MPI version SetDeviceAfterInit(* rank); return STATUS_OK; }
/** * @brief Parses the application's command-line arguments * * @param[in] argc The number of input arguments * @param[in] argv The input arguments * @param[in] rank The MPI rank of the calling process * @param[in] size The total number of MPI processes available * @param[out] domSize The parsed domain size (2D) * @param[out] topSize The parsed topology size (2D) * @param[out] useFastSwap The parsed flag for fast block swap * @return The parsing status (STATUS_OK indicates a successful parse) */ int ParseCommandLineArguments(int argc, char ** argv, int rank, int size, int2 * domSize, int2 * topSize, int * useFastSwap) { int canPrint = (rank == MPI_MASTER_RANK); int argIdx; // If help is requested, all other arguments will be ignored if ((FindAndClearArgument("-h", argc, argv) != -1) || (FindAndClearArgument("--help", argc, argv) != -1)) { if (canPrint) { PrintUsage(argv[0]); } // This simply prevents the application from continuing return STATUS_ERR; } // Check if fast swapping was requested * useFastSwap = (FindAndClearArgument("-fs", argc, argv) != -1); // Topology information must always be present argIdx = FindAndClearArgument("-t", argc, argv); if (argIdx == -1) { OneErrPrintf(canPrint, "Error: Could not find the topology information.\n"); return STATUS_ERR; } else { topSize->x = ExtractNumber(argIdx + 1, argc, argv); topSize->y = ExtractNumber(argIdx + 2, argc, argv); // At least the first topology dimension must be specified if (topSize->x <= 0) { OneErrPrintf(canPrint, "Error: The topology size is invalid (first value: %d)\n", topSize->x); return STATUS_ERR; } // If the second topology dimension is missing, it will default to 1 if (topSize->y <= 0) { topSize->y = 1; } } // The domain size information is optional argIdx = FindAndClearArgument("-d", argc, argv); if (argIdx == -1) { domSize->x = domSize->y = DEFAULT_DOMAIN_SIZE; } else { domSize->x = ExtractNumber(argIdx + 1, argc, argv); domSize->y = ExtractNumber(argIdx + 2, argc, argv); // At least the first domain dimension must be specified if (domSize->x < MIN_DOM_SIZE) { OneErrPrintf(canPrint, "Error: The local domain size must be at least %d (currently: %d)\n", MIN_DOM_SIZE, domSize->x); return STATUS_ERR; } // If the second domain dimension is missing, it will default to the first dimension's value if (domSize->y <= 0) { domSize->y = domSize->x; } } // At the end, there should be no other arguments that haven't been parsed for (int i = 1; i < argc; ++i) { if (strlen(argv[i]) > 0) { OneErrPrintf(canPrint, "Error: Unknown argument (\"%s\")\n", argv[i]); return STATUS_ERR; } } // If we reach this point, all arguments were parsed successfully if (canPrint) { printf("Topology size: %d x %d\n", topSize->x, topSize->y); printf("Local domain size (current node): %d x %d\n", domSize->x, domSize->y); printf("Global domain size (all nodes): %d x %d\n", topSize->x * domSize->x, topSize->y * domSize->y); } return STATUS_OK; }