/** * Transfer the block form the remote rank, that holds it, * or if it´s the same rank, copy it by using memcpy. */ void grid::NumaDistStaticGrid::getBlock(unsigned long block, long oldBlock, unsigned long cacheIndex, unsigned char *cache) { unsigned long blockSize = getTotalBlockSize(); int remoteRank = getBlockRank(block); incCounter(perf::Counter::MPI); int mpiResult; if (remoteRank == getMPIRank()) { //The block is located in the same NUMA Domain, but in the memspace of another thread. pthread_t remoteId = getThreadId(block); size_t offset = getType().getSize()*blockSize*getBlockThreadOffset(block); //copy the block // std::cout << "Memcpy Thread: " << remoteId << " Pointer: " << &(m_threadHandle.getStaticPtr(remoteId, m_id)); memcpy(cache, m_threadHandle.getStaticPtr(remoteId, m_id) + offset, getType().getSize()*blockSize); } else { //This section is critical. Only one Thread is allowed to access. //TODO: Find a better solution than pthread mutex. unsigned long offset = getBlockOffset(block); NDBG_UNUSED(mpiResult); mpiResult = m_threadHandle.getBlock(cache, blockSize, getType().getMPIType(), remoteRank, offset * blockSize, blockSize, getType().getMPIType(), m_threadHandle.mpiWindow); assert(mpiResult == MPI_SUCCESS); } }
void grid::NumaLocalStaticGrid::getAt(void* buf, types::Type::converter_t converter, long unsigned int x, long unsigned int y, long unsigned int z) { unsigned long blockSize = getTotalBlockSize(); unsigned long block = getBlockByCoords(x, y, z); int remoteMPIRank = getBlockRank(block); unsigned long remoteThreadId = getThreadId(block); NDBG_UNUSED(remoteMPIRank); unsigned long offset = getBlockThreadOffset(block); // Offset inside the block x %= getBlockSize(0); y %= getBlockSize(1); z %= getBlockSize(2); assert(remoteMPIRank == getMPIRank()); assert(remoteThreadId == pthread_self()); if(pthread_equal(remoteThreadId, pthread_self())){ (getType().*converter)(&m_data[getType().getSize() * (blockSize * offset // jump to the correct block + (z * getBlockSize(1) + y) * getBlockSize(0) + x) // correct value inside the block ], buf); } }
int asagi::Grid::nodeLocalRank(MPI_Comm comm) { // The main idea for this function is taken from: // https://blogs.fau.de/wittmann/2013/02/mpi-node-local-rank-determination/ // http://git.rrze.uni-erlangen.de/gitweb/?p=apsm.git;a=blob;f=MpiNodeRank.cpp;hb=HEAD int mpiResult; NDBG_UNUSED(mpiResult); typedef char procName_t[MPI_MAX_PROCESSOR_NAME+1]; // Get the processor name procName_t procName; int procNameLength; MPI_Get_processor_name(procName, &procNameLength); assert(procNameLength <= MPI_MAX_PROCESSOR_NAME); procName[procNameLength] = '\0'; // Compute Adler32 hash const uint8_t* buffer = reinterpret_cast<const uint8_t*>(procName); uint32_t s1 = 1; uint32_t s2 = 0; for (int i = 0; i < procNameLength; i++) { s1 = (s1 + buffer[i]) % 65521; s2 = (s2 + s1) % 65521; } uint32_t hash = (s2 << 16) | s1; int rank; mpiResult = MPI_Comm_rank(comm, &rank); assert(mpiResult == MPI_SUCCESS); MPI_Comm nodeComm; mpiResult = MPI_Comm_split(comm, hash, rank, &nodeComm); assert(mpiResult == MPI_SUCCESS); // Gather all proc names of this node to detect Adler32 collisions int nodeSize; mpiResult = MPI_Comm_size(nodeComm, &nodeSize); assert(mpiResult == MPI_SUCCESS); procName_t* procNames = new procName_t[nodeSize]; mpiResult = MPI_Allgather(procName, MPI_MAX_PROCESSOR_NAME+1, MPI_CHAR, procNames, MPI_MAX_PROCESSOR_NAME+1, MPI_CHAR, nodeComm); assert(mpiResult == MPI_SUCCESS); // recv contains now an array of hostnames from all MPI ranks of // this communicator. They are sorted ascending by the MPI rank. int nodeRank, realNodeRank = 0; mpiResult = MPI_Comm_rank(nodeComm, &nodeRank); assert(mpiResult == MPI_SUCCESS); for (int i = 0; i < nodeRank; i++) { if (strcmp(procName, procNames[i]) == 0) // Detect false hash collisions realNodeRank++; } mpiResult = MPI_Comm_free(&nodeComm); assert(mpiResult == MPI_SUCCESS); delete [] procNames; return realNodeRank; }