Beispiel #1
0
   void *allocate_aligned_with_offset( uint_t size, uint_t alignment, uint_t offset )
   {
      // With 0 alignment this function makes no sense
      // use normal malloc instead
      WALBERLA_ASSERT_GREATER( alignment, 0 );
      // Tests if alignment is power of two (assuming alignment>0)
      WALBERLA_ASSERT( !(alignment & (alignment - 1)) );

      WALBERLA_ASSERT_LESS( offset, alignment );

      if( offset == 0 )
      {
         void * result = nullptr;
         WALBERLA_CUDA_CHECK( cudaMalloc( &result, size ) );
         freePointers_[result] = result;
         return result;
      }

      void *pa;  // pointer to allocated memory
      void *ptr; // pointer to usable aligned memory

      WALBERLA_CUDA_CHECK( cudaMalloc( &pa, size + alignment ));
      WALBERLA_CHECK_EQUAL(size_t(pa) % alignment, 0 , "CUDA malloc did not return memory with requested alignment");
      ptr = (void *) ((char *) (pa) + alignment - offset);
      freePointers_[ptr] = pa;

      WALBERLA_ASSERT_EQUAL(((size_t) ptr + offset) % alignment, 0 );
      return ptr;
   }
Beispiel #2
0
uint_t StaticLevelwiseCurveBalanceWeighted::operator()( SetupBlockForest & forest, const uint_t numberOfProcesses, const memory_t /*perProcessMemoryLimit*/ )
{
   // TODO: take per process memory limit into account?

   std::vector< SetupBlock * > blocks;
   if( hilbert_ )
      forest.getHilbertOrder( blocks );
   else
      forest.getMortonOrder( blocks );

   uint_t usedProcesses( uint_t(0) );

   for( uint_t level = uint_t(0); level < forest.getNumberOfLevels(); ++level )
   {
      std::vector< SetupBlock * > blocksOnLevel;

      for( auto block = blocks.begin(); block != blocks.end(); ++block )
         if( (*block)->getLevel() == level )
            blocksOnLevel.push_back( *block );

      workload_t totalWeight( 0 );
      for( auto block = blocksOnLevel.begin(); block != blocksOnLevel.end(); ++block )
      {
         WALBERLA_ASSERT( !( (*block)->getWorkload() < workload_t(0) ) );
         totalWeight += (*block)->getWorkload();
      }

      uint_t c( uint_t(0) );
      for( uint_t p = uint_t(0); p != numberOfProcesses; ++p )
      {
         const workload_t minWeight = totalWeight / workload_c( numberOfProcesses - p );
         workload_t weight( 0 );
         while( weight < minWeight && c < blocksOnLevel.size() )
         {
            blocksOnLevel[c]->assignTargetProcess(p);

            WALBERLA_ASSERT_LESS_EQUAL( p, usedProcesses );
            usedProcesses = p + uint_t(1);

            const workload_t addedWeight = blocksOnLevel[c]->getWorkload();
            weight += addedWeight;
            totalWeight -= addedWeight;
            ++c;
         }
      }
      while( c < blocksOnLevel.size() )
      {
         blocksOnLevel[c]->assignTargetProcess( numberOfProcesses - uint_t(1) );

         WALBERLA_ASSERT_LESS_EQUAL( numberOfProcesses - uint_t(1), usedProcesses );
         usedProcesses = numberOfProcesses;

         ++c;
      }


   }

   return usedProcesses;
}
Beispiel #3
0
   WALBERLA_MPI_SECTION()
   {
      WALBERLA_ASSERT( !isMPIInitialized_ );

      // Check first that MPI was not initialized before
      // f.e. when using Python, MPI could have been initialized by
      // a different MPI module like mpi4py
      int mpiAlreadyInitialized=0;
      MPI_Initialized( &mpiAlreadyInitialized );
      if ( ! mpiAlreadyInitialized ) {
         MPI_Init( argc, argv );
         finalizeOnDestruction_ = true;
      }

      isMPIInitialized_ = true;
      MPI_Comm_size( MPI_COMM_WORLD, &numProcesses_ );
      MPI_Comm_rank( MPI_COMM_WORLD, &worldRank_ );

      if( abortOnException )
         std::set_terminate( customTerminateHandler );
   }
Beispiel #4
0
/// Complexity is O(N), where N == this->size()
CellInterval CellSet::boundingBox() const {

   WALBERLA_ASSERT( !empty() );

   Set<Cell>::const_iterator beginIt = Set<Cell>::begin();
   Set<Cell>::const_iterator endIt   = Set<Cell>::end();

   CellInterval interval( beginIt->x(), beginIt->y(), beginIt->z(), beginIt->x(), beginIt->y(), beginIt->z() );

   for( Set<Cell>::const_iterator cellIt = ++beginIt; cellIt != endIt; ++cellIt ) {

      if( cellIt->x() < interval.xMin() ) interval.xMin() = cellIt->x();
      if( cellIt->y() < interval.yMin() ) interval.yMin() = cellIt->y();
      if( cellIt->z() < interval.zMin() ) interval.zMin() = cellIt->z();

      if( cellIt->x() > interval.xMax() ) interval.xMax() = cellIt->x();
      if( cellIt->y() > interval.yMax() ) interval.yMax() = cellIt->y();
      if( cellIt->z() > interval.zMax() ) interval.zMax() = cellIt->z();
   }

   return interval;
}
Beispiel #5
0
void Block::resetNeighborhood( const PhantomBlock & phantom )
{
   std::map< BlockID, uint_t > neighborhoodMapping;

   neighborhood_.clear();
   for( uint_t i = 0; i != phantom.getNeighborhoodSize(); ++i )
   {
      neighborhood_.emplace_back( forest_, phantom.getNeighborId(i), phantom.getNeighborProcess(i), phantom.getNeighborState(i) );
      neighborhoodMapping[ phantom.getNeighborId(i) ] = i;
   }

   for( uint_t i = 0; i != 26; ++i )
   {
      neighborhoodSection_[i].clear();
      for( uint_t j = 0; j != phantom.getNeighborhoodSectionSize(i); ++j )
      {
         WALBERLA_ASSERT( neighborhoodMapping.find( phantom.getNeighborId(i,j) ) != neighborhoodMapping.end() );

         neighborhoodSection_[i].push_back( &(neighborhood_[ neighborhoodMapping[phantom.getNeighborId(i,j)] ]) );
      }
   }
}
Beispiel #6
0
/*******************************************************************************************************************//**
 * \brief   Gets all prime factors of a number.
 *
 * Uses trial division algorithm.
 * See http://en.wikipedia.org/w/index.php?title=Trial_division&oldid=518625973.
 *
 * \param   n  The number to be factorized.
 *
 * \pre     n > 0
 *
 * \return  The prime factors in ascending order.
 **********************************************************************************************************************/
std::vector<uint_t> getPrimeFactors( const uint_t n )
{
   WALBERLA_ASSERT( n != 0 );

   auto primes = getPrimes(n);
   std::vector<uint_t> primeFactors;

   uint_t n_rest = n;
   for(auto primeIt = primes.begin(); primeIt != primes.end(); ++primeIt)
   {
      if( *primeIt * *primeIt > n )
         break;
      while( n_rest % *primeIt == 0)
      {
         n_rest /= *primeIt;
         primeFactors.push_back(*primeIt);
      }
   }

   if( n_rest != 1 )
      primeFactors.push_back(n_rest);

   return primeFactors;
}