/** Initialization function on device * * Cache density and energy density fields on device and initialize * possible prerequisites for ionization, like e.g. random number * generator. * * This function will be called inline on the device which must happen BEFORE threads diverge * during loop execution. The reason for this is the `__syncthreads()` call which is necessary after * initializing the field shared boxes in shared memory. * * @param blockCell Offset of the cell from the origin of the local domain * *including guarding supercells* in units of cells * @param linearThreadIdx Linearized thread ID inside the block * @param localCellOffset Offset of the cell from the origin of the local * domain, i.e. from the @see BORDER * *without guarding supercells* */ DINLINE void init(const DataSpace<simDim>& blockCell, const int& linearThreadIdx, const DataSpace<simDim>& localCellOffset) { /* caching of density and "temperature" fields */ cachedRho = CachedBox::create < 0, ValueType_Rho > (BlockArea()); cachedEne = CachedBox::create < 1, ValueType_Ene > (BlockArea()); /* instance of nvidia assignment operator */ nvidia::functors::Assign assign; /* copy fields from global to shared */ auto fieldRhoBlock = rhoBox.shift(blockCell); ThreadCollective<BlockArea> collective(linearThreadIdx); collective( assign, cachedRho, fieldRhoBlock ); /* copy fields from global to shared */ auto fieldEneBlock = eneBox.shift(blockCell); collective( assign, cachedEne, fieldEneBlock ); /* wait for shared memory to be initialized */ __syncthreads(); /* initialize random number generator with the local cell index in the simulation */ this->randomGen.init(localCellOffset); }
DINLINE void collectiveInit( const T_Acc & acc, const DataSpace<simDim>& blockCell, const T_WorkerCfg & workerCfg ) { /* caching of E and B fields */ cachedB = CachedBox::create< 0, ValueType_B >( acc, BlockArea() ); cachedE = CachedBox::create< 1, ValueType_E >( acc, BlockArea() ); /* instance of nvidia assignment operator */ nvidia::functors::Assign assign; /* copy fields from global to shared */ auto fieldBBlock = bBox.shift(blockCell); ThreadCollective< BlockArea, T_WorkerCfg::numWorkers > collective( workerCfg.getWorkerIdx( ) ); collective( acc, assign, cachedB, fieldBBlock ); /* copy fields from global to shared */ auto fieldEBlock = eBox.shift(blockCell); collective( acc, assign, cachedE, fieldEBlock ); /* wait for shared memory to be initialized */ __syncthreads(); }
#include<assert.h> #include<mpi.h> #include<civl-mpi.cvh> #include<stdio.h> $input int in; $input int _mpi_nprocs = 2; $assume(in > 0); MPI_Comm comm = MPI_COMM_WORLD; int rank; int gimmeOne(int x) $requires {$collective(comm) $mpi_isRecvBufEmpty(1-rank)} $requires {$collective(MPI_COMM_WORLD) $true} $requires {x > 0} $ensures {$collective(MPI_COMM_WORLD) $mpi_isRecvBufEmpty(1-rank)} $ensures {$collective(MPI_COMM_WORLD) $result == 1 + x} $ensures {x == in} { return 1 + x; } int main(int argc, char * argv[]) { int x; MPI_Init(&argc, &argv); MPI_Comm_rank(comm, &rank); if(rank == 1) MPI_Send(&rank, 1, MPI_INT, 0, 0, comm); x = gimmeOne(in); if(rank == 0)