void PrintParData(ParData data, std::string message) { #ifndef RELEASE CallStackEntry entry("Wave3d::PrintParData"); #endif int mpirank = getMPIRank(); if (mpirank == 0) { std::cout << message << std::endl << "mean: " << data.mean << std::endl << "var: " << data.var << std::endl << "max: " << data.max << std::endl << "min: " << data.min << std::endl; } }
void grid::NumaDistStaticGrid::getAt(void* buf, types::Type::converter_t converter, unsigned long x, unsigned long y, unsigned long z) { unsigned long block = getBlockByCoords(x, y, z); int remoteMPIRank = getBlockRank(block); unsigned long remoteThreadId = getThreadId(block); if (remoteMPIRank == getMPIRank() && pthread_equal(remoteThreadId, pthread_self())) { // Nice, this is a block where we are the master NumaLocalStaticGrid::getAt(buf, converter, x, y, z); return; } // This function will call getBlock, if we need to transfer the block NumaLocalCacheGrid::getAt(buf, converter, x, y, z); }
/** * Transfer the block form the remote rank, that holds it, * or if it´s the same rank, copy it by using memcpy. */ void grid::NumaDistStaticGrid::getBlock(unsigned long block, long oldBlock, unsigned long cacheIndex, unsigned char *cache) { unsigned long blockSize = getTotalBlockSize(); int remoteRank = getBlockRank(block); incCounter(perf::Counter::MPI); int mpiResult; if (remoteRank == getMPIRank()) { //The block is located in the same NUMA Domain, but in the memspace of another thread. pthread_t remoteId = getThreadId(block); size_t offset = getType().getSize()*blockSize*getBlockThreadOffset(block); //copy the block // std::cout << "Memcpy Thread: " << remoteId << " Pointer: " << &(m_threadHandle.getStaticPtr(remoteId, m_id)); memcpy(cache, m_threadHandle.getStaticPtr(remoteId, m_id) + offset, getType().getSize()*blockSize); } else { //This section is critical. Only one Thread is allowed to access. //TODO: Find a better solution than pthread mutex. unsigned long offset = getBlockOffset(block); NDBG_UNUSED(mpiResult); mpiResult = m_threadHandle.getBlock(cache, blockSize, getType().getMPIType(), remoteRank, offset * blockSize, blockSize, getType().getMPIType(), m_threadHandle.mpiWindow); assert(mpiResult == MPI_SUCCESS); } }
void grid::NumaLocalStaticGrid::getAt(void* buf, types::Type::converter_t converter, long unsigned int x, long unsigned int y, long unsigned int z) { unsigned long blockSize = getTotalBlockSize(); unsigned long block = getBlockByCoords(x, y, z); int remoteMPIRank = getBlockRank(block); unsigned long remoteThreadId = getThreadId(block); NDBG_UNUSED(remoteMPIRank); unsigned long offset = getBlockThreadOffset(block); // Offset inside the block x %= getBlockSize(0); y %= getBlockSize(1); z %= getBlockSize(2); assert(remoteMPIRank == getMPIRank()); assert(remoteThreadId == pthread_self()); if(pthread_equal(remoteThreadId, pthread_self())){ (getType().*converter)(&m_data[getType().getSize() * (blockSize * offset // jump to the correct block + (z * getBlockSize(1) + y) * getBlockSize(0) + x) // correct value inside the block ], buf); } }
string WMUtils::makeFileName(bool makeNew) { /* Retrieve the rank */ int rank = getMPIRank(); /* If rank 0 make a folder name based on root */ string folder(WMTRACEDIR); if (rank == 0) { if (makeNew) folder.assign(makeUniqueFolder(folder)); else folder.assign(findUniqueFolder(folder)); } /* Broadcast the folder name to all ranks */ char folder_str[100]; sprintf(folder_str, "%s", folder.c_str()); /* Only perform the broadcast if we are using MPI */ #ifndef NO_MPI MPI_Bcast(folder_str, 100, MPI_CHAR, 0, MPI_COMM_WORLD); #endif return stichFileName(folder_str, rank); }
//--------------------------------------------------------------------- int Wave3d::check(ParVec<int, cpx, PtPrtn>& den, ParVec<int, cpx, PtPrtn>& val, IntNumVec& chkkeys, double& relerr) { #ifndef RELEASE CallStackEntry entry("Wave3d::check"); #endif SAFE_FUNC_EVAL( MPI_Barrier(MPI_COMM_WORLD) ); _self = this; int mpirank = getMPIRank(); ParVec<int, Point3, PtPrtn>& pos = (*_posptr); //1. get pos std::vector<int> all(1,1); std::vector<int> chkkeyvec; for (int i = 0; i < chkkeys.m(); ++i) { chkkeyvec.push_back( chkkeys(i) ); } pos.getBegin(chkkeyvec, all); pos.getEnd(all); std::vector<Point3> tmpsrcpos; for (std::map<int,Point3>::iterator mi = pos.lclmap().begin(); mi != pos.lclmap().end(); ++mi) { if(pos.prtn().owner(mi->first) == mpirank) { tmpsrcpos.push_back(mi->second); } } std::vector<cpx> tmpsrcden; for (std::map<int,cpx>::iterator mi = den.lclmap().begin(); mi != den.lclmap().end(); ++mi) { if(den.prtn().owner(mi->first) == mpirank) { tmpsrcden.push_back(mi->second); } } std::vector<Point3> tmptrgpos; for (int i = 0; i < chkkeyvec.size(); ++i) { tmptrgpos.push_back( pos.access(chkkeyvec[i]) ); } DblNumMat srcpos(3, tmpsrcpos.size(), false, (double*)&(tmpsrcpos[0])); CpxNumVec srcden(tmpsrcden.size(), false, (cpx*)&(tmpsrcden[0])); DblNumMat trgpos(3, tmptrgpos.size(), false, (double*)&(tmptrgpos[0])); CpxNumVec trgval(tmptrgpos.size()); CpxNumMat inter; SAFE_FUNC_EVAL( _kernel.kernel(trgpos, srcpos, srcpos, inter) ); // If no points were assigned to this processor, then the trgval // should be zero. if (inter.n() != 0) { SAFE_FUNC_EVAL( zgemv(1.0, inter, srcden, 0.0, trgval) ); } else { for (int i = 0; i < trgval.m(); ++i) { trgval(i) = 0; } } CpxNumVec allval(trgval.m()); SAFE_FUNC_EVAL( MPI_Barrier(MPI_COMM_WORLD) ); // Note: 2 doubles per complex number SAFE_FUNC_EVAL( MPI_Allreduce(trgval.data(), allval.data(), trgval.m() * 2, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD) ); //2. get val val.getBegin(chkkeyvec, all); val.getEnd(all); CpxNumVec truval(chkkeyvec.size()); for(int i = 0; i < chkkeyvec.size(); ++i) truval(i) = val.access(chkkeyvec[i]); CpxNumVec errval(chkkeyvec.size()); for(int i = 0; i < chkkeyvec.size(); ++i) errval(i) = allval(i) - truval(i); double tn = sqrt( energy(truval) ); double en = sqrt( energy(errval) ); relerr = en / tn; SAFE_FUNC_EVAL( MPI_Barrier(MPI_COMM_WORLD) ); return 0; }