__global__ void CountMakroParticle(ParBox parBox, CounterBox counterBox, Mapping mapper) { typedef MappingDesc::SuperCellSize SuperCellSize; typedef typename ParBox::FrameType FrameType; const DataSpace<simDim> block(mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx))); /* counterBox has no guarding supercells*/ const DataSpace<simDim> counterCell = block - mapper.getGuardingSuperCells(); const DataSpace<simDim > threadIndex(threadIdx); const int linearThreadIdx = DataSpaceOperations<simDim>::template map<SuperCellSize > (threadIndex); __shared__ uint64_cu counterValue; __shared__ FrameType *frame; __shared__ bool isValid; if (linearThreadIdx == 0) { counterValue = 0; frame = &(parBox.getLastFrame(block, isValid)); if (!isValid) { counterBox(counterCell) = counterValue; } } __syncthreads(); if (!isValid) return; //end kernel if we have no frames bool isParticle = (*frame)[linearThreadIdx][multiMask_]; while (isValid) { if (isParticle) { atomicAdd(&counterValue, static_cast<uint64_cu> (1LU)); } __syncthreads(); if (linearThreadIdx == 0) { frame = &(parBox.getPreviousFrame(*frame, isValid)); } isParticle = true; __syncthreads(); } if (linearThreadIdx == 0) counterBox(counterCell) = counterValue; }
__global__ void kernelAddOneParticle(ParBox pb, DataSpace<simDim> superCell, DataSpace<simDim> parLocalCell) { typedef typename ParBox::FrameType FRAME; FRAME *frame; int linearIdx = DataSpaceOperations<simDim>::template map<MappingDesc::SuperCellSize > (parLocalCell); float_X parWeighting = NUM_EL_PER_PARTICLE; frame = &(pb.getEmptyFrame()); pb.setAsLastFrame(*frame, superCell); // many particle loop: for (unsigned i = 0; i < 1; ++i) { PMACC_AUTO(par,(*frame)[i]); floatD_X pos; for(int i=0; i<simDim; ++i) pos[i] = 0.5; const float_X GAMMA0_X = 1.0f / sqrtf(1.0f - float_X(BETA0_X * BETA0_X)); const float_X GAMMA0_Y = 1.0f / sqrtf(1.0f - float_X(BETA0_Y * BETA0_Y)); const float_X GAMMA0_Z = 1.0f / sqrtf(1.0f - float_X(BETA0_Z * BETA0_Z)); float3_X mom = float3_X( GAMMA0_X * getMass<FRAME>(parWeighting) * float_X(BETA0_X) * SPEED_OF_LIGHT, GAMMA0_Y * getMass<FRAME>(parWeighting) * float_X(BETA0_Y) * SPEED_OF_LIGHT, GAMMA0_Z * getMass<FRAME>(parWeighting) * float_X(BETA0_Z) * SPEED_OF_LIGHT ); par[position_] = pos; par[momentum_] = mom; par[multiMask_] = 1; par[localCellIdx_] = linearIdx; par[weighting_] = parWeighting; #if(ENABLE_RADIATION == 1) par[momentumPrev1_] = float3_X(0.f, 0.f, 0.f); #if(RAD_MARK_PARTICLE>1) || (RAD_ACTIVATE_GAMMA_FILTER!=0) /*this code tree is only passed if we not select any particle*/ par[radiationFlag_] = true; #endif #endif } }
__global__ void kernelParticleDensity(ParBox pb, DataBox<PitchedBox<Type_, DIM2> > image, DataSpace<DIM2> transpose, int slice, uint32_t globalOffset, uint32_t sliceDim, Mapping mapper) { typedef typename ParBox::FrameType FRAME; typedef typename MappingDesc::SuperCellSize Block; __shared__ FRAME *frame; __shared__ bool isValid; __syncthreads(); /*wait that all shared memory is initialised*/ bool isImageThread = false; const DataSpace<simDim> threadId(threadIdx); const DataSpace<DIM2> localCell(threadId[transpose.x()], threadId[transpose.y()]); const DataSpace<simDim> block = mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx)); const DataSpace<simDim> blockOffset((block - 1) * Block::getDataSpace()); int localId = threadIdx.z * Block::x * Block::y + threadIdx.y * Block::x + threadIdx.x; if (localId == 0) isValid = false; __syncthreads(); //\todo: guard size should not be set to (fixed) 1 here const DataSpace<simDim> realCell(blockOffset + threadId); //delete guard from cell idx uint32_t globalCell = realCell[sliceDim] + globalOffset; if (globalCell == slice) { isValid = true; isImageThread = true; } __syncthreads(); if (!isValid) return; /*index in image*/ DataSpace<DIM2> imageCell( realCell[transpose.x()], realCell[transpose.y()]); // counter is always DIM2 typedef DataBox < PitchedBox< float_X, DIM2 > > SharedMem; extern __shared__ float_X shBlock[]; __syncthreads(); /*wait that all shared memory is initialised*/ const DataSpace<simDim> blockSize(blockDim); SharedMem counter(PitchedBox<float_X, DIM2 > ((float_X*) shBlock, DataSpace<DIM2 > (), blockSize[transpose.x()] * sizeof (float_X))); if (isImageThread) { counter(localCell) = float_X(0.0); } if (localId == 0) { frame = &(pb.getFirstFrame(block, isValid)); } __syncthreads(); while (isValid) //move over all Frames { PMACC_AUTO(particle,(*frame)[localId]); if (particle[multiMask_] == 1) { int cellIdx = particle[localCellIdx_]; // we only draw the first slice of cells in the super cell (z == 0) const DataSpace<DIM3> particleCellId(DataSpaceOperations<DIM3>::template map<Block > (cellIdx)); uint32_t globalParticleCell = particleCellId[sliceDim] + globalOffset + blockOffset[sliceDim]; if (globalParticleCell == slice) { const DataSpace<DIM2> reducedCell(particleCellId[transpose.x()], particleCellId[transpose.y()]); atomicAddWrapper(&(counter(reducedCell)), particle[weighting_] / NUM_EL_PER_PARTICLE); } } __syncthreads(); if (localId == 0) { frame = &(pb.getNextFrame(*frame, isValid)); } __syncthreads(); } if (isImageThread) { image(imageCell) = (Type_) counter(localCell); } }
__global__ void kernelAddOneParticle(ParBox pb, DataSpace<simDim> superCell, DataSpace<simDim> parLocalCell) { typedef typename ParBox::FrameType FRAME; FRAME *frame; int linearIdx = DataSpaceOperations<simDim>::template map<MappingDesc::SuperCellSize > (parLocalCell); float_X parWeighting = NUM_EL_PER_PARTICLE; frame = &(pb.getEmptyFrame()); pb.setAsLastFrame(*frame, superCell); // many particle loop: for (unsigned i = 0; i < 1; ++i) { PMACC_AUTO(par, (*frame)[i]); /** we now initialize all attributes of the new particle to their default values * some attributes, such as the position, localCellIdx, weighting or the * multiMask (\see AttrToIgnore) of the particle will be set individually * in the following lines since they are already known at this point. */ { typedef typename ParBox::FrameType FrameType; typedef typename FrameType::ValueTypeSeq ParticleAttrList; typedef bmpl::vector4<position<>, multiMask, localCellIdx, weighting> AttrToIgnore; typedef typename ResolveAndRemoveFromSeq<ParticleAttrList, AttrToIgnore>::type ParticleCleanedAttrList; algorithms::forEach::ForEach<ParticleCleanedAttrList, SetAttributeToDefault<bmpl::_1> > setToDefault; setToDefault(forward(par)); } float3_X pos = float3_X(0.5, 0.5, 0.5); const float_X GAMMA0 = (float_X) (1.0 / sqrt(1.0 - (BETA0_X * BETA0_X + BETA0_Y * BETA0_Y + BETA0_Z * BETA0_Z))); float3_X mom = float3_X( GAMMA0 * attribute::getMass(parWeighting,par) * float_X(BETA0_X) * SPEED_OF_LIGHT, GAMMA0 * attribute::getMass(parWeighting,par) * float_X(BETA0_Y) * SPEED_OF_LIGHT, GAMMA0 * attribute::getMass(parWeighting,par) * float_X(BETA0_Z) * SPEED_OF_LIGHT ); par[position_] = pos; par[momentum_] = mom; par[multiMask_] = 1; par[localCellIdx_] = linearIdx; par[weighting_] = parWeighting; #if(ENABLE_RADIATION == 1) #if(RAD_MARK_PARTICLE>1) || (RAD_ACTIVATE_GAMMA_FILTER!=0) par[radiationFlag_] = true; #endif #endif } }
__global__ void kernelPaintParticles3D(ParBox pb, DataBox<PitchedBox<float3_X, DIM2> > image, DataSpace<DIM2> transpose, int slice, uint32_t globalOffset, uint32_t sliceDim, Mapping mapper) { typedef typename ParBox::FrameType FRAME; typedef typename MappingDesc::SuperCellSize Block; __shared__ FRAME *frame; __shared__ bool isValid; __syncthreads(); /*wait that all shared memory is initialised*/ bool isImageThread = false; const DataSpace<simDim> threadId(threadIdx); const DataSpace<DIM2> localCell(threadId[transpose.x()], threadId[transpose.y()]); const DataSpace<simDim> block = mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx)); const DataSpace<simDim> blockOffset((block - 1) * Block::getDataSpace()); int localId = threadIdx.z * Block::x * Block::y + threadIdx.y * Block::x + threadIdx.x; if (localId == 0) isValid = false; __syncthreads(); //\todo: guard size should not be set to (fixed) 1 here const DataSpace<simDim> realCell(blockOffset + threadId); //delete guard from cell idx #if(SIMDIM==DIM3) uint32_t globalCell = realCell[sliceDim] + globalOffset; if (globalCell == slice) #endif { atomicExch((int*) &isValid, 1); /*WAW Error in cuda-memcheck racecheck*/ isImageThread = true; } __syncthreads(); if (!isValid) return; /*index in image*/ DataSpace<DIM2> imageCell( realCell[transpose.x()], realCell[transpose.y()]); // counter is always DIM2 typedef DataBox < PitchedBox< float_X, DIM2 > > SharedMem; extern __shared__ float_X shBlock[]; __syncthreads(); /*wait that all shared memory is initialised*/ const DataSpace<simDim> blockSize(blockDim); SharedMem counter(PitchedBox<float_X, DIM2 > ((float_X*) shBlock, DataSpace<DIM2 > (), blockSize[transpose.x()] * sizeof (float_X))); if (isImageThread) { counter(localCell) = float_X(0.0); } if (localId == 0) { frame = &(pb.getFirstFrame(block, isValid)); } __syncthreads(); while (isValid) //move over all Frames { PMACC_AUTO(particle,(*frame)[localId]); if (particle[multiMask_] == 1) { int cellIdx = particle[localCellIdx_]; // we only draw the first slice of cells in the super cell (z == 0) const DataSpace<simDim> particleCellId(DataSpaceOperations<simDim>::template map<Block > (cellIdx)); #if(SIMDIM==DIM3) uint32_t globalParticleCell = particleCellId[sliceDim] + globalOffset + blockOffset[sliceDim]; if (globalParticleCell == slice) #endif { const DataSpace<DIM2> reducedCell(particleCellId[transpose.x()], particleCellId[transpose.y()]); atomicAddWrapper(&(counter(reducedCell)), particle[weighting_] / NUM_EL_PER_PARTICLE); } } __syncthreads(); if (localId == 0) { frame = &(pb.getNextFrame(*frame, isValid)); } __syncthreads(); } if (isImageThread) { /** Note: normally, we would multiply by NUM_EL_PER_PARTICLE again. * BUT: since we are interested in a simple value between 0 and 1, * we stay with this number (normalized to the order of macro * particles) and devide by the number of typical macro particles * per cell */ float_X value = counter(localCell) / float_X(particleInit::NUM_PARTICLES_PER_CELL); // * NUM_EL_PER_PARTICLE; if (value > 1.0) value = 1.0; //image(imageCell).x() = value; visPreview::preParticleDensCol::addRGB(image(imageCell), value, visPreview::preParticleDens_opacity); // cut to [0, 1] if (image(imageCell).x() < float_X(0.0)) image(imageCell).x() = float_X(0.0); if (image(imageCell).x() > float_X(1.0)) image(imageCell).x() = float_X(1.0); if (image(imageCell).y() < float_X(0.0)) image(imageCell).y() = float_X(0.0); if (image(imageCell).y() > float_X(1.0)) image(imageCell).y() = float_X(1.0); if (image(imageCell).z() < float_X(0.0)) image(imageCell).z() = float_X(0.0); if (image(imageCell).z() > float_X(1.0)) image(imageCell).z() = float_X(1.0); } }