__global__ void kernelPositionsParticles(ParticlesBox<FRAME, simDim> pb, SglParticle<FloatPos>* gParticle, Mapping mapper) { __shared__ FRAME *frame; __shared__ bool isValid; __syncthreads(); /*wait that all shared memory is initialised*/ typedef typename Mapping::SuperCellSize SuperCellSize; const DataSpace<simDim > threadIndex(threadIdx); const int linearThreadIdx = DataSpaceOperations<simDim>::template map<SuperCellSize > (threadIndex); const DataSpace<simDim> superCellIdx(mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx))); if (linearThreadIdx == 0) { frame = &(pb.getLastFrame(superCellIdx, isValid)); } __syncthreads(); if (!isValid) return; //end kernel if we have no frames bool isParticle = (*frame)[linearThreadIdx][multiMask_]; while (isValid) { if (isParticle) { PMACC_AUTO(particle,(*frame)[linearThreadIdx]); gParticle->position = particle[position_]; gParticle->momentum = particle[momentum_]; gParticle->weighting = particle[weighting_]; gParticle->mass = frame->getMass(gParticle->weighting); gParticle->charge = frame->getCharge(gParticle->weighting); gParticle->gamma = Gamma<>()(gParticle->momentum, gParticle->mass); // storage number in the actual frame const lcellId_t frameCellNr = particle[localCellIdx_]; // offset in the actual superCell = cell offset in the supercell const DataSpace<simDim> frameCellOffset(DataSpaceOperations<simDim>::template map<MappingDesc::SuperCellSize > (frameCellNr)); gParticle->globalCellOffset = (superCellIdx - mapper.getGuardingSuperCells()) * MappingDesc::SuperCellSize::getDataSpace() + frameCellOffset; } __syncthreads(); if (linearThreadIdx == 0) { frame = &(pb.getPreviousFrame(*frame, isValid)); } isParticle = true; __syncthreads(); } }
__global__ void kernelCountParticles(PBox pb, uint64_cu* gCounter, Filter filter, Mapping mapper) { typedef typename PBox::FrameType FRAME; const uint32_t Dim = Mapping::Dim; __shared__ FRAME *frame; __shared__ bool isValid; __shared__ int counter; __shared__ lcellId_t particlesInSuperCell; __syncthreads(); /*wait that all shared memory is initialised*/ typedef typename Mapping::SuperCellSize SuperCellSize; const DataSpace<Dim > threadIndex(threadIdx); const int linearThreadIdx = DataSpaceOperations<Dim>::template map<SuperCellSize > (threadIndex); const DataSpace<Dim> superCellIdx(mapper.getSuperCellIndex(DataSpace<Dim > (blockIdx))); if (linearThreadIdx == 0) { frame = &(pb.getLastFrame(superCellIdx, isValid)); particlesInSuperCell = pb.getSuperCell(superCellIdx).getSizeLastFrame(); counter = 0; } __syncthreads(); if (!isValid) return; //end kernel if we have no frames filter.setSuperCellPosition((superCellIdx - mapper.getGuardingSuperCells()) * mapper.getSuperCellSize()); while (isValid) { if (linearThreadIdx < particlesInSuperCell) { if (filter(*frame, linearThreadIdx)) atomicAdd(&counter, 1); } __syncthreads(); if (linearThreadIdx == 0) { frame = &(pb.getPreviousFrame(*frame, isValid)); particlesInSuperCell = math::CT::volume<SuperCellSize>::type::value; } __syncthreads(); } __syncthreads(); if (linearThreadIdx == 0) { atomicAdd(gCounter, (uint64_cu) counter); } }
__global__ void CountMakroParticle(ParBox parBox, CounterBox counterBox, Mapping mapper) { typedef MappingDesc::SuperCellSize SuperCellSize; typedef typename ParBox::FrameType FrameType; const DataSpace<simDim> block(mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx))); /* counterBox has no guarding supercells*/ const DataSpace<simDim> counterCell = block - mapper.getGuardingSuperCells(); const DataSpace<simDim > threadIndex(threadIdx); const int linearThreadIdx = DataSpaceOperations<simDim>::template map<SuperCellSize > (threadIndex); __shared__ uint64_cu counterValue; __shared__ FrameType *frame; __shared__ bool isValid; if (linearThreadIdx == 0) { counterValue = 0; frame = &(parBox.getLastFrame(block, isValid)); if (!isValid) { counterBox(counterCell) = counterValue; } } __syncthreads(); if (!isValid) return; //end kernel if we have no frames bool isParticle = (*frame)[linearThreadIdx][multiMask_]; while (isValid) { if (isParticle) { atomicAdd(&counterValue, static_cast<uint64_cu> (1LU)); } __syncthreads(); if (linearThreadIdx == 0) { frame = &(parBox.getPreviousFrame(*frame, isValid)); } isParticle = true; __syncthreads(); } if (linearThreadIdx == 0) counterBox(counterCell) = counterValue; }
__global__ void kernelCellwiseOperation( FieldBox field, T_OpFunctor opFunctor, T_ValFunctor valFunctor, const DataSpace<simDim> totalCellOffset, const uint32_t currentStep, Mapping mapper ) { const DataSpace<simDim> block( mapper.getSuperCellIndex( DataSpace<simDim>( blockIdx ) ) ); const DataSpace<simDim> blockCell = block * MappingDesc::SuperCellSize::toRT(); const DataSpace<simDim> threadIndex( threadIdx ); opFunctor( field( blockCell + threadIndex ), valFunctor( blockCell + threadIndex + totalCellOffset, currentStep ) ); }
DINLINE void operator()(BoxReadOnly buffRead, BoxWriteOnly buffWrite, uint32_t rule, Mapping mapper) const { typedef typename BoxReadOnly::ValueType Type; typedef SuperCellDescription< typename Mapping::SuperCellSize, math::CT::Int< 1, 1 >, math::CT::Int< 1, 1 > > BlockArea; auto cache = CachedBox::create < 0, Type > (BlockArea()); const Space block(mapper.getSuperCellIndex(Space(blockIdx))); const Space blockCell = block * Mapping::SuperCellSize::toRT(); const Space threadIndex(threadIdx); auto buffRead_shifted = buffRead.shift(blockCell); ThreadCollective<BlockArea> collective(threadIndex); nvidia::functors::Assign assign; collective( assign, cache, buffRead_shifted ); __syncthreads(); Type neighbors = 0; for (uint32_t i = 1; i < 9; ++i) { Space offset(Mask::getRelativeDirections<DIM2 > (i)); neighbors += cache(threadIndex + offset); } Type isLife = cache(threadIndex); isLife = (bool)(((!isLife)*(1 << (neighbors + 9))) & rule) + (bool)(((isLife)*(1 << (neighbors))) & rule); buffWrite(blockCell + threadIndex) = isLife; }
__global__ void kernelLineSliceFields(E_DataBox fieldE, B_DataBox fieldB, float3_X* sliceDataField, DataSpace<simDim> globalCellIdOffset, DataSpace<simDim> globalNrOfCells, Mapping mapper) { typedef typename Mapping::SuperCellSize SuperCellSize; const DataSpace<simDim > threadIndex(threadIdx); const int linearThreadIdx = DataSpaceOperations<simDim>::template map<SuperCellSize > (threadIndex); const DataSpace<simDim> superCellIdx(mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx))); __syncthreads(); // GPU-local cell id with lower GPU-local guarding const DataSpace<simDim> localCell(superCellIdx * SuperCellSize::toRT() + threadIndex); const float3_X b = fieldB(localCell); const float3_X e = fieldE(localCell); // GPU-local cell id without lower GPU-local guarding const DataSpace<simDim> localCellWG( localCell - SuperCellSize::toRT() * mapper.getGuardingSuperCells()); // global cell id const DataSpace<simDim> globalCell = localCellWG + globalCellIdOffset; // slice out one cell along an axis if ((globalCell.x() == globalNrOfCells.x() / 2)) #if(SIMDIM==DIM3) if(globalCell.z() == globalNrOfCells.z() / 2) #endif sliceDataField[localCellWG.y()] = e; __syncthreads(); }
DINLINE void operator()(BoxWriteOnly buffWrite, uint32_t seed, float fraction, Mapping mapper) const { /* get position in grid in units of SuperCells from blockID */ const Space block(mapper.getSuperCellIndex(Space(blockIdx))); /* convert position in unit of cells */ const Space blockCell = block * Mapping::SuperCellSize::toRT(); /* convert CUDA dim3 to DataSpace<DIM3> */ const Space threadIndex(threadIdx); const uint32_t cellIdx = DataSpaceOperations<DIM2>::map( mapper.getGridSuperCells() * Mapping::SuperCellSize::toRT(), blockCell + threadIndex); /* get uniform random number from seed */ auto rng = nvidia::rng::create( nvidia::rng::methods::Xor(seed, cellIdx), nvidia::rng::distributions::Uniform_float()); /* write 1(white) if uniform random number 0<rng<1 is smaller than 'fraction' */ buffWrite(blockCell + threadIndex) = (rng() <= fraction); }
__global__ void kernelParticleDensity(ParBox pb, DataBox<PitchedBox<Type_, DIM2> > image, DataSpace<DIM2> transpose, int slice, uint32_t globalOffset, uint32_t sliceDim, Mapping mapper) { typedef typename ParBox::FrameType FRAME; typedef typename MappingDesc::SuperCellSize Block; __shared__ FRAME *frame; __shared__ bool isValid; __syncthreads(); /*wait that all shared memory is initialised*/ bool isImageThread = false; const DataSpace<simDim> threadId(threadIdx); const DataSpace<DIM2> localCell(threadId[transpose.x()], threadId[transpose.y()]); const DataSpace<simDim> block = mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx)); const DataSpace<simDim> blockOffset((block - 1) * Block::getDataSpace()); int localId = threadIdx.z * Block::x * Block::y + threadIdx.y * Block::x + threadIdx.x; if (localId == 0) isValid = false; __syncthreads(); //\todo: guard size should not be set to (fixed) 1 here const DataSpace<simDim> realCell(blockOffset + threadId); //delete guard from cell idx uint32_t globalCell = realCell[sliceDim] + globalOffset; if (globalCell == slice) { isValid = true; isImageThread = true; } __syncthreads(); if (!isValid) return; /*index in image*/ DataSpace<DIM2> imageCell( realCell[transpose.x()], realCell[transpose.y()]); // counter is always DIM2 typedef DataBox < PitchedBox< float_X, DIM2 > > SharedMem; extern __shared__ float_X shBlock[]; __syncthreads(); /*wait that all shared memory is initialised*/ const DataSpace<simDim> blockSize(blockDim); SharedMem counter(PitchedBox<float_X, DIM2 > ((float_X*) shBlock, DataSpace<DIM2 > (), blockSize[transpose.x()] * sizeof (float_X))); if (isImageThread) { counter(localCell) = float_X(0.0); } if (localId == 0) { frame = &(pb.getFirstFrame(block, isValid)); } __syncthreads(); while (isValid) //move over all Frames { PMACC_AUTO(particle,(*frame)[localId]); if (particle[multiMask_] == 1) { int cellIdx = particle[localCellIdx_]; // we only draw the first slice of cells in the super cell (z == 0) const DataSpace<DIM3> particleCellId(DataSpaceOperations<DIM3>::template map<Block > (cellIdx)); uint32_t globalParticleCell = particleCellId[sliceDim] + globalOffset + blockOffset[sliceDim]; if (globalParticleCell == slice) { const DataSpace<DIM2> reducedCell(particleCellId[transpose.x()], particleCellId[transpose.y()]); atomicAddWrapper(&(counter(reducedCell)), particle[weighting_] / NUM_EL_PER_PARTICLE); } } __syncthreads(); if (localId == 0) { frame = &(pb.getNextFrame(*frame, isValid)); } __syncthreads(); } if (isImageThread) { image(imageCell) = (Type_) counter(localCell); } }
__global__ void kernelPaintParticles3D(ParBox pb, DataBox<PitchedBox<float3_X, DIM2> > image, DataSpace<DIM2> transpose, int slice, uint32_t globalOffset, uint32_t sliceDim, Mapping mapper) { typedef typename ParBox::FrameType FRAME; typedef typename MappingDesc::SuperCellSize Block; __shared__ FRAME *frame; __shared__ bool isValid; __syncthreads(); /*wait that all shared memory is initialised*/ bool isImageThread = false; const DataSpace<simDim> threadId(threadIdx); const DataSpace<DIM2> localCell(threadId[transpose.x()], threadId[transpose.y()]); const DataSpace<simDim> block = mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx)); const DataSpace<simDim> blockOffset((block - 1) * Block::getDataSpace()); int localId = threadIdx.z * Block::x * Block::y + threadIdx.y * Block::x + threadIdx.x; if (localId == 0) isValid = false; __syncthreads(); //\todo: guard size should not be set to (fixed) 1 here const DataSpace<simDim> realCell(blockOffset + threadId); //delete guard from cell idx #if(SIMDIM==DIM3) uint32_t globalCell = realCell[sliceDim] + globalOffset; if (globalCell == slice) #endif { atomicExch((int*) &isValid, 1); /*WAW Error in cuda-memcheck racecheck*/ isImageThread = true; } __syncthreads(); if (!isValid) return; /*index in image*/ DataSpace<DIM2> imageCell( realCell[transpose.x()], realCell[transpose.y()]); // counter is always DIM2 typedef DataBox < PitchedBox< float_X, DIM2 > > SharedMem; extern __shared__ float_X shBlock[]; __syncthreads(); /*wait that all shared memory is initialised*/ const DataSpace<simDim> blockSize(blockDim); SharedMem counter(PitchedBox<float_X, DIM2 > ((float_X*) shBlock, DataSpace<DIM2 > (), blockSize[transpose.x()] * sizeof (float_X))); if (isImageThread) { counter(localCell) = float_X(0.0); } if (localId == 0) { frame = &(pb.getFirstFrame(block, isValid)); } __syncthreads(); while (isValid) //move over all Frames { PMACC_AUTO(particle,(*frame)[localId]); if (particle[multiMask_] == 1) { int cellIdx = particle[localCellIdx_]; // we only draw the first slice of cells in the super cell (z == 0) const DataSpace<simDim> particleCellId(DataSpaceOperations<simDim>::template map<Block > (cellIdx)); #if(SIMDIM==DIM3) uint32_t globalParticleCell = particleCellId[sliceDim] + globalOffset + blockOffset[sliceDim]; if (globalParticleCell == slice) #endif { const DataSpace<DIM2> reducedCell(particleCellId[transpose.x()], particleCellId[transpose.y()]); atomicAddWrapper(&(counter(reducedCell)), particle[weighting_] / NUM_EL_PER_PARTICLE); } } __syncthreads(); if (localId == 0) { frame = &(pb.getNextFrame(*frame, isValid)); } __syncthreads(); } if (isImageThread) { /** Note: normally, we would multiply by NUM_EL_PER_PARTICLE again. * BUT: since we are interested in a simple value between 0 and 1, * we stay with this number (normalized to the order of macro * particles) and devide by the number of typical macro particles * per cell */ float_X value = counter(localCell) / float_X(particleInit::NUM_PARTICLES_PER_CELL); // * NUM_EL_PER_PARTICLE; if (value > 1.0) value = 1.0; //image(imageCell).x() = value; visPreview::preParticleDensCol::addRGB(image(imageCell), value, visPreview::preParticleDens_opacity); // cut to [0, 1] if (image(imageCell).x() < float_X(0.0)) image(imageCell).x() = float_X(0.0); if (image(imageCell).x() > float_X(1.0)) image(imageCell).x() = float_X(1.0); if (image(imageCell).y() < float_X(0.0)) image(imageCell).y() = float_X(0.0); if (image(imageCell).y() > float_X(1.0)) image(imageCell).y() = float_X(1.0); if (image(imageCell).z() < float_X(0.0)) image(imageCell).z() = float_X(0.0); if (image(imageCell).z() > float_X(1.0)) image(imageCell).z() = float_X(1.0); } }
__global__ void kernelPaintFields( EBox fieldE, BBox fieldB, JBox fieldJ, DataBox<PitchedBox<float3_X, DIM2> > image, DataSpace<DIM2> transpose, const int slice, const uint32_t globalOffset, const uint32_t sliceDim, Mapping mapper) { typedef typename MappingDesc::SuperCellSize Block; const DataSpace<simDim> threadId(threadIdx); const DataSpace<simDim> block = mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx)); const DataSpace<simDim> cell(block * Block::getDataSpace() + threadId); const DataSpace<simDim> blockOffset((block - mapper.getGuardingSuperCells()) * Block::getDataSpace()); const DataSpace<simDim> realCell(cell - MappingDesc::SuperCellSize::getDataSpace() * mapper.getGuardingSuperCells()); //delete guard from cell idx const DataSpace<DIM2> imageCell( realCell[transpose.x()], realCell[transpose.y()]); const DataSpace<simDim> realCell2(blockOffset + threadId); //delete guard from cell idx #if (SIMDIM==DIM3) uint32_t globalCell = realCell2[sliceDim] + globalOffset; if (globalCell != slice) return; #endif // set fields of this cell to vars typename BBox::ValueType field_b = fieldB(cell); typename EBox::ValueType field_e = fieldE(cell); typename JBox::ValueType field_j = fieldJ(cell); #if(SIMDIM==DIM3) field_j = float3_X( field_j.x() * CELL_HEIGHT * CELL_DEPTH, field_j.y() * CELL_WIDTH * CELL_DEPTH, field_j.z() * CELL_WIDTH * CELL_HEIGHT ); #elif (SIMDIM==DIM2) field_j = float3_X( field_j.x() * CELL_HEIGHT, field_j.y() * CELL_WIDTH, field_j.z() * CELL_WIDTH * CELL_HEIGHT ); #endif // reset picture to black // color range for each RGB channel: [0.0, 1.0] float3_X pic = float3_X(0., 0., 0.); // typical values of the fields to normalize them to [0,1] // pic.x() = visPreview::preChannel1(field_b / typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().x(), field_e / typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().y(), field_j / typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().z()); pic.y() = visPreview::preChannel2(field_b / typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().x(), field_e / typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().y(), field_j / typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().z()); pic.z() = visPreview::preChannel3(field_b / typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().x(), field_e / typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().y(), field_j / typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().z()); //visPreview::preChannel1Col::addRGB(pic, // visPreview::preChannel1(field_b * typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().x(), // field_e * typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().y(), // field_j * typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().z()), // visPreview::preChannel1_opacity); //visPreview::preChannel2Col::addRGB(pic, // visPreview::preChannel2(field_b * typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().x(), // field_e * typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().y(), // field_j * typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().z()), // visPreview::preChannel2_opacity); //visPreview::preChannel3Col::addRGB(pic, // visPreview::preChannel3(field_b * typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().x(), // field_e * typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().y(), // field_j * typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().z()), // visPreview::preChannel3_opacity); // draw to (perhaps smaller) image cell image(imageCell) = pic; }
DINLINE void operator()(ParBoxIons ionBox, ParBoxElectrons electronBox, FrameIonizer frameIonizer, Mapping mapper) const { /* "particle box" : container/iterator where the particles live in * and where one can get the frame in a super cell from */ typedef typename ParBoxElectrons::FrameType ELECTRONFRAME; typedef typename ParBoxIons::FrameType IONFRAME; typedef typename ParBoxIons::FramePtr IonFramePtr; typedef typename ParBoxElectrons::FramePtr ElectronFramePtr; /* specify field to particle interpolation scheme */ typedef typename PMacc::traits::Resolve< typename GetFlagType<IONFRAME,interpolation<> >::type >::type InterpolationScheme; /* margins around the supercell for the interpolation of the field on the cells */ typedef typename GetMargin<InterpolationScheme>::LowerMargin LowerMargin; typedef typename GetMargin<InterpolationScheme>::UpperMargin UpperMargin; /* relevant area of a block */ typedef SuperCellDescription< typename MappingDesc::SuperCellSize, LowerMargin, UpperMargin > BlockDescription_; /* for not mixing operations::assign up with the nvidia functor assign */ namespace partOp = PMacc::particles::operations; /* definitions for domain variables, like indices of blocks and threads */ typedef typename BlockDescription_::SuperCellSize SuperCellSize; /* multi-dimensional offset vector from local domain origin on GPU in units of super cells */ const DataSpace<simDim> block(mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx))); /* multi-dim vector from origin of the block to a cell in units of cells */ const DataSpace<simDim > threadIndex(threadIdx); /* conversion from a multi-dim cell coordinate to a linear coordinate of the cell in its super cell */ const int linearThreadIdx = DataSpaceOperations<simDim>::template map<SuperCellSize > (threadIndex); /* multi-dim offset from the origin of the local domain on GPU * to the origin of the block of the in unit of cells */ const DataSpace<simDim> blockCell = block * SuperCellSize::toRT(); /* subtract guarding cells to only have the simulation volume */ const DataSpace<simDim> localCellIndex = (block * SuperCellSize::toRT() + threadIndex) - mapper.getGuardingSuperCells() * SuperCellSize::toRT(); /* typedef for the functor that writes new macro electrons into electron frames during runtime */ typedef typename particles::ionization::WriteElectronIntoFrame WriteElectronIntoFrame; PMACC_SMEM( ionFrame, IonFramePtr ); PMACC_SMEM( electronFrame,ElectronFramePtr ); PMACC_SMEM( maxParticlesInFrame, lcellId_t ); /* find last frame in super cell * define maxParticlesInFrame as the maximum frame size */ if (linearThreadIdx == 0) { ionFrame = ionBox.getLastFrame(block); maxParticlesInFrame = PMacc::math::CT::volume<SuperCellSize>::type::value; } __syncthreads(); if (!ionFrame.isValid()) return; //end kernel if we have no frames /* caching of E- and B- fields and initialization of random generator if needed */ frameIonizer.init(blockCell, linearThreadIdx, localCellIndex); /* Declare counter in shared memory that will later tell the current fill level or * occupation of the newly created target electron frames. */ PMACC_SMEM( newFrameFillLvl, int ); /* Declare local variable oldFrameFillLvl for each thread */ int oldFrameFillLvl; /* Initialize local (register) counter for each thread * - describes how many new macro electrons should be created */ unsigned int newMacroElectrons = 0; /* Declare local electron ID * - describes at which position in the new frame the new electron is to be created */ int electronId; /* Master initializes the frame fill level with 0 */ if (linearThreadIdx == 0) { newFrameFillLvl = 0; electronFrame = nullptr; } __syncthreads(); /* move over source species frames and call frameIonizer * frames are worked on in backwards order to avoid asking if there is another frame * --> performance * Because all frames are completely filled except the last and apart from that last frame * one wants to make sure that all threads are working and every frame is worked on. */ while (ionFrame.isValid()) { /* casting uint8_t multiMask to boolean */ const bool isParticle = ionFrame[linearThreadIdx][multiMask_]; __syncthreads(); /* < IONIZATION and change of charge states > * if the threads contain particles, the frameIonizer can ionize them * if they are non-particles their inner ionization counter remains at 0 */ if (isParticle) /* ionization based on ionization model - this actually increases charge states*/ frameIonizer(*ionFrame, linearThreadIdx, newMacroElectrons); __syncthreads(); /* always true while-loop over all particles inside source frame until each thread breaks out individually * * **Attention**: Speaking of 1st and 2nd frame only may seem odd. * The question might arise what happens if more electrons are created than would fit into two frames. * Well, multi-ionization during a time step is accounted for. The number of new electrons is * determined inside the outer loop over the valid frames while in the inner loop each thread can create only ONE * new macro electron. But the loop repeats until each thread has created all the electrons needed in the time step. */ while (true) { /* < INIT > * - electronId is initialized as -1 (meaning: invalid) * - (local) oldFrameFillLvl set equal to (shared) newFrameFillLvl for each thread * --> each thread remembers the old "counter" * - then sync */ electronId = -1; oldFrameFillLvl = newFrameFillLvl; __syncthreads(); /* < CHECK & ADD > * - if a thread wants to create electrons in each cycle it can do that only once * and before that it atomically adds to the shared counter and uses the current * value as electronId in the new frame * - then sync */ if (newMacroElectrons > 0) electronId = nvidia::atomicAllInc(&newFrameFillLvl); __syncthreads(); /* < EXIT? > * - if the counter hasn't changed all threads break out of the loop */ if (oldFrameFillLvl == newFrameFillLvl) break; __syncthreads(); /* < FIRST NEW FRAME > * - if there is no frame, yet, the master will create a new target electron frame * and attach it to the back of the frame list * - sync all threads again for them to know which frame to use */ if (linearThreadIdx == 0) { if (!electronFrame.isValid()) { electronFrame = electronBox.getEmptyFrame(); electronBox.setAsLastFrame(electronFrame, block); } } __syncthreads(); /* < CREATE 1 > * - all electrons fitting into the current frame are created there * - internal ionization counter is decremented by 1 * - sync */ if ((0 <= electronId) && (electronId < maxParticlesInFrame)) { /* each thread makes the attributes of its ion accessible */ auto parentIon = (ionFrame[linearThreadIdx]); /* each thread initializes an electron if one should be created */ auto targetElectronFull = (electronFrame[electronId]); /* create an electron in the new electron frame: * - see particles/ionization/ionizationMethods.hpp */ WriteElectronIntoFrame writeElectron; writeElectron(parentIon,targetElectronFull); newMacroElectrons -= 1; } __syncthreads(); /* < SECOND NEW FRAME > * - if the shared counter is larger than the frame size a new electron frame is reserved * and attached to the back of the frame list * - then the shared counter is set back by one frame size * - sync so that every thread knows about the new frame */ if (linearThreadIdx == 0) { if (newFrameFillLvl >= maxParticlesInFrame) { electronFrame = electronBox.getEmptyFrame(); electronBox.setAsLastFrame(electronFrame, block); newFrameFillLvl -= maxParticlesInFrame; } } __syncthreads(); /* < CREATE 2 > * - if the EID is larger than the frame size * - the EID is set back by one frame size * - the thread writes an electron to the new frame * - the internal counter is decremented by 1 */ if (electronId >= maxParticlesInFrame) { electronId -= maxParticlesInFrame; /* each thread makes the attributes of its ion accessible */ auto parentIon = ((*ionFrame)[linearThreadIdx]); /* each thread initializes an electron if one should be produced */ auto targetElectronFull = (electronFrame[electronId]); /* create an electron in the new electron frame: * - see particles/ionization/ionizationMethods.hpp */ WriteElectronIntoFrame writeElectron; writeElectron(parentIon,targetElectronFull); newMacroElectrons -= 1; } __syncthreads(); } __syncthreads(); if (linearThreadIdx == 0) { ionFrame = ionBox.getPreviousFrame(ionFrame); maxParticlesInFrame = PMacc::math::CT::volume<SuperCellSize>::type::value; } __syncthreads(); } } // void kernelIonizeParticles