uint_t StaticLevelwiseCurveBalanceWeighted::operator()( SetupBlockForest & forest, const uint_t numberOfProcesses, const memory_t /*perProcessMemoryLimit*/ ) { // TODO: take per process memory limit into account? std::vector< SetupBlock * > blocks; if( hilbert_ ) forest.getHilbertOrder( blocks ); else forest.getMortonOrder( blocks ); uint_t usedProcesses( uint_t(0) ); for( uint_t level = uint_t(0); level < forest.getNumberOfLevels(); ++level ) { std::vector< SetupBlock * > blocksOnLevel; for( auto block = blocks.begin(); block != blocks.end(); ++block ) if( (*block)->getLevel() == level ) blocksOnLevel.push_back( *block ); workload_t totalWeight( 0 ); for( auto block = blocksOnLevel.begin(); block != blocksOnLevel.end(); ++block ) { WALBERLA_ASSERT( !( (*block)->getWorkload() < workload_t(0) ) ); totalWeight += (*block)->getWorkload(); } uint_t c( uint_t(0) ); for( uint_t p = uint_t(0); p != numberOfProcesses; ++p ) { const workload_t minWeight = totalWeight / workload_c( numberOfProcesses - p ); workload_t weight( 0 ); while( weight < minWeight && c < blocksOnLevel.size() ) { blocksOnLevel[c]->assignTargetProcess(p); WALBERLA_ASSERT_LESS_EQUAL( p, usedProcesses ); usedProcesses = p + uint_t(1); const workload_t addedWeight = blocksOnLevel[c]->getWorkload(); weight += addedWeight; totalWeight -= addedWeight; ++c; } } while( c < blocksOnLevel.size() ) { blocksOnLevel[c]->assignTargetProcess( numberOfProcesses - uint_t(1) ); WALBERLA_ASSERT_LESS_EQUAL( numberOfProcesses - uint_t(1), usedProcesses ); usedProcesses = numberOfProcesses; ++c; } } return usedProcesses; }
uint_t StaticLevelwiseCurveBalance::operator()( SetupBlockForest & forest, const uint_t numberOfProcesses, const memory_t /*perProcessMemoryLimit*/ ) { // TODO: take per process memory limit into account? std::vector< SetupBlock * > blocks; if( hilbert_ ) forest.getHilbertOrder( blocks ); else forest.getMortonOrder( blocks ); uint_t border = uint_t(0); for( uint_t level = forest.getNumberOfLevels(); level-- > uint_t(0); ) { std::vector< SetupBlock * > blocksOnLevel; for( auto block = blocks.begin(); block != blocks.end(); ++block ) if( (*block)->getLevel() == level ) blocksOnLevel.push_back( *block ); const uint_t nBlocks = blocksOnLevel.size(); if( nBlocks <= ( numberOfProcesses - border ) ) { for( auto block = blocksOnLevel.begin(); block != blocksOnLevel.end(); ++block ) (*block)->assignTargetProcess( border++ ); WALBERLA_ASSERT_LESS_EQUAL( border, numberOfProcesses ); if( border == numberOfProcesses ) border = uint_t(0); } else { const uint_t reducedNBlocks = nBlocks - ( numberOfProcesses - border); const uint_t div = reducedNBlocks / numberOfProcesses; const uint_t mod = reducedNBlocks % numberOfProcesses; uint_t bIndex = uint_t(0); for( uint_t p = 0; p != numberOfProcesses; ++p ) { uint_t count = div; if( p < mod ) ++count; if( p >= border ) ++count; WALBERLA_ASSERT_LESS_EQUAL( bIndex + count, blocksOnLevel.size() ); for( uint_t i = bIndex; i < ( bIndex + count ); ++i ) blocksOnLevel[i]->assignTargetProcess( p ); bIndex += count; } border = mod; } } return std::min( numberOfProcesses, blocks.size() ); }
static void test() { for( uint_t i = 0; i < 5; ++i ) { SetupBlockForest forest; forest.addRefinementSelectionFunction( refinementSelectionFunctionAll ); real_t xmin = math::realRandom( real_c(-100), real_c(100) ); real_t xmax = math::realRandom( xmin + real_c(10), real_c(120) ); real_t ymin = math::realRandom( real_c(-100), real_c(100) ); real_t ymax = math::realRandom( ymin + real_c(10), real_c(120) ); real_t zmin = math::realRandom( real_c(-100), real_c(100) ); real_t zmax = math::realRandom( zmin + real_c(10), real_c(120) ); AABB domain( xmin, ymin, zmin, xmax, ymax, zmax ); forest.init( domain, math::intRandom( uint_t(5), uint_t(20) ), math::intRandom( uint_t(5), uint_t(20) ), math::intRandom( uint_t(5), uint_t(20) ), math::boolRandom(), math::boolRandom(), math::boolRandom() ); checkNeighborhoodConsistency( forest ); checkCollectorConsistency( forest ); } for( uint_t i = 0; i < 5; ++i ) { SetupBlockForest forest; forest.addRefinementSelectionFunction( refinementSelectionFunctionRandom ); real_t xmin = math::realRandom( real_c(-100), real_c(100) ); real_t xmax = math::realRandom( xmin + real_c(10), real_c(120) ); real_t ymin = math::realRandom( real_c(-100), real_c(100) ); real_t ymax = math::realRandom( ymin + real_c(10), real_c(120) ); real_t zmin = math::realRandom( real_c(-100), real_c(100) ); real_t zmax = math::realRandom( zmin + real_c(10), real_c(120) ); AABB domain( xmin, ymin, zmin, xmax, ymax, zmax ); forest.init( domain, math::intRandom( uint_t(5), uint_t(20) ), math::intRandom( uint_t(5), uint_t(20) ), math::intRandom( uint_t(5), uint_t(20) ), math::boolRandom(), math::boolRandom(), math::boolRandom() ); checkNeighborhoodConsistency( forest ); checkCollectorConsistency( forest ); } }
void test(const shared_ptr< DistanceOctree< MeshType > > & distanceOctree, const MeshType & mesh, const AABB & domainAABB, Vector3<uint_t> numBlocks) { Vector3<real_t> blockSize(domainAABB.xSize() / real_c(numBlocks[0]), domainAABB.ySize() / real_c(numBlocks[1]), domainAABB.zSize() / real_c(numBlocks[2])); real_t maxError = blockSize.min() / real_t(10); SetupBlockForest setupBlockforest; setupBlockforest.addRootBlockExclusionFunction(F(distanceOctree, maxError)); setupBlockforest.addWorkloadMemorySUIDAssignmentFunction(blockforest::uniformWorkloadAndMemoryAssignment); setupBlockforest.init(domainAABB, numBlocks[0], numBlocks[1], numBlocks[2], false, false, false); WALBERLA_LOG_DEVEL(setupBlockforest.toString()); std::vector< Vector3<real_t> > vertexPositions; vertexPositions.reserve(mesh.n_vertices()); for (auto vIt = mesh.vertices_begin(); vIt != mesh.vertices_end(); ++vIt) { vertexPositions.push_back(toWalberla(mesh.point(*vIt))); } std::vector< const blockforest::SetupBlock* > setupBlocks; setupBlockforest.getBlocks(setupBlocks); // Check wether all vertices are located in allocated blocks std::vector< Vector3<real_t> > uncoveredVertices(vertexPositions); for (auto bIt = setupBlocks.begin(); bIt != setupBlocks.end(); ++bIt) { const AABB & aabb = (*bIt)->getAABB(); uncoveredVertices.erase(std::remove_if(uncoveredVertices.begin(), uncoveredVertices.end(), PointInAABB(aabb)), uncoveredVertices.end()); } WALBERLA_CHECK(uncoveredVertices.empty(), "Not all vertices of the mesh are located in allocated blocks!"); //setupBlockforest.assignAllBlocksToRootProcess(); //setupBlockforest.writeVTKOutput( "setupblockforest" ); }
static void checkNeighborhoodConsistency( const SetupBlockForest& forest ) { std::vector< const SetupBlock* > blocks; forest.getBlocks( blocks ); const int blockssize = int_c( blocks.size() ); #ifdef _OPENMP #pragma omp parallel for schedule(static) #endif for( int i = 0; i < blockssize; ++i ) { const SetupBlock* const block = blocks[uint_c(i)]; std::vector< real_t > neighborhoodSectionBlockCenters; for( uint_t n = 0; n != 26; ++n ) { std::vector< bool > hit( block->getNeighborhoodSectionSize(n), false ); constructNeighborhoodSectionBlockCenters( n, block->getAABB(), neighborhoodSectionBlockCenters ); WALBERLA_CHECK_EQUAL( neighborhoodSectionBlockCenters.size() % 3, uint_c(0) ); for( uint_t p = 0; p != neighborhoodSectionBlockCenters.size(); p += 3 ) { real_t x = neighborhoodSectionBlockCenters[p]; real_t y = neighborhoodSectionBlockCenters[p+1]; real_t z = neighborhoodSectionBlockCenters[p+2]; // treat periodicity if( x < forest.getDomain().xMin() && forest.isXPeriodic() ) x = forest.getDomain().xMax() - forest.getDomain().xMin() + x; if( x >= forest.getDomain().xMax() && forest.isXPeriodic() ) x = forest.getDomain().xMin() - forest.getDomain().xMax() + x; if( y < forest.getDomain().yMin() && forest.isYPeriodic() ) y = forest.getDomain().yMax() - forest.getDomain().yMin() + y; if( y >= forest.getDomain().yMax() && forest.isYPeriodic() ) y = forest.getDomain().yMin() - forest.getDomain().yMax() + y; if( z < forest.getDomain().zMin() && forest.isZPeriodic() ) z = forest.getDomain().zMax() - forest.getDomain().zMin() + z; if( z >= forest.getDomain().zMax() && forest.isZPeriodic() ) z = forest.getDomain().zMin() - forest.getDomain().zMax() + z; bool noHit = true; for( uint_t c = 0; c != block->getNeighborhoodSectionSize(n) && noHit; ++c ) { if( block->getNeighbor(n,c)->getAABB().contains(x,y,z) ) { hit[c] = true; noHit = false; } } // either one neighbor must be hit OR the block is located at the border of the (non-periodic) simulation domain if( noHit ) WALBERLA_CHECK( forest.getBlock(x,y,z) == NULL ); } // every neighbor must be hit by at least one point for( uint_t c = 0; c != block->getNeighborhoodSectionSize(n); ++c ) WALBERLA_CHECK( hit[c] ); neighborhoodSectionBlockCenters.clear(); } } }
uint_t CartesianDistribution::operator()( SetupBlockForest & forest, const uint_t numberOfProcesses, const memory_t /*perProcessMemoryLimit*/ ) { if( numberOfProcesses != ( numberOfXProcesses_ * numberOfYProcesses_ * numberOfZProcesses_ ) ) WALBERLA_ABORT( "Load balancing failed: The total number of processes must be identical to the product " "of the \'number of processes in x-, y-, and z-direction\'." ); if( numberOfXProcesses_ > forest.getXSize() ) WALBERLA_ABORT( "Load balancing failed: \'Number of processes in x-direction\' must be in (0," << forest.getXSize() << "]. " "You specified \'" << numberOfXProcesses_ << "\'." ); if( numberOfYProcesses_ > forest.getYSize() ) WALBERLA_ABORT( "Load balancing failed: \'Number of processes in y-direction\' must be in (0," << forest.getYSize() << "]. " "You specified \'" << numberOfYProcesses_ << "\'." ); if( numberOfZProcesses_ > forest.getZSize() ) WALBERLA_ABORT( "Load balancing failed: \'Number of processes in z-direction\' must be in (0," << forest.getZSize() << "]. " "You specified \'" << numberOfZProcesses_ << "\'." ); if( processIdMap_ != NULL ) WALBERLA_CHECK_EQUAL( processIdMap_->size(), numberOfProcesses ); uint_t partitions[3]; partitions[0] = numberOfXProcesses_; partitions[1] = numberOfYProcesses_; partitions[2] = numberOfZProcesses_; std::vector< uint_t > indices[3]; for( uint_t i = 0; i != 3; ++i ) { const uint_t div = forest.getSize(i) / partitions[i]; const uint_t mod = forest.getSize(i) % partitions[i]; indices[i].resize( partitions[i] + 1, div ); indices[i][0] = 0; for( uint_t j = 0; j != mod; ++j ) ++indices[i][j+1]; for( uint_t j = 1; j != indices[i].size(); ++j ) indices[i][j] += indices[i][j-1]; } for( uint_t z = 0; z != partitions[2]; ++z ) { for( uint_t y = 0; y != partitions[1]; ++y ) { for( uint_t x = 0; x != partitions[0]; ++x ) { std::vector< SetupBlock * > partitionBlocks; forest.getBlocks( partitionBlocks, indices[0][x], indices[1][y], indices[2][z], indices[0][x+1], indices[1][y+1], indices[2][z+1] ); for( auto block = partitionBlocks.begin(); block != partitionBlocks.end(); ++block ) { const uint_t index = z * partitions[0] * partitions[1] + y * partitions[0] + x; (*block)->assignTargetProcess( ( processIdMap_ != NULL ) ? (*processIdMap_)[ index ] : index ); } } } } return numberOfProcesses; }