__global__ void kernelPaintParticles3D(ParBox pb, DataBox<PitchedBox<float3_X, DIM2> > image, DataSpace<DIM2> transpose, int slice, uint32_t globalOffset, uint32_t sliceDim, Mapping mapper) { typedef typename ParBox::FrameType FRAME; typedef typename MappingDesc::SuperCellSize Block; __shared__ FRAME *frame; __shared__ bool isValid; __syncthreads(); /*wait that all shared memory is initialised*/ bool isImageThread = false; const DataSpace<simDim> threadId(threadIdx); const DataSpace<DIM2> localCell(threadId[transpose.x()], threadId[transpose.y()]); const DataSpace<simDim> block = mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx)); const DataSpace<simDim> blockOffset((block - 1) * Block::getDataSpace()); int localId = threadIdx.z * Block::x * Block::y + threadIdx.y * Block::x + threadIdx.x; if (localId == 0) isValid = false; __syncthreads(); //\todo: guard size should not be set to (fixed) 1 here const DataSpace<simDim> realCell(blockOffset + threadId); //delete guard from cell idx #if(SIMDIM==DIM3) uint32_t globalCell = realCell[sliceDim] + globalOffset; if (globalCell == slice) #endif { atomicExch((int*) &isValid, 1); /*WAW Error in cuda-memcheck racecheck*/ isImageThread = true; } __syncthreads(); if (!isValid) return; /*index in image*/ DataSpace<DIM2> imageCell( realCell[transpose.x()], realCell[transpose.y()]); // counter is always DIM2 typedef DataBox < PitchedBox< float_X, DIM2 > > SharedMem; extern __shared__ float_X shBlock[]; __syncthreads(); /*wait that all shared memory is initialised*/ const DataSpace<simDim> blockSize(blockDim); SharedMem counter(PitchedBox<float_X, DIM2 > ((float_X*) shBlock, DataSpace<DIM2 > (), blockSize[transpose.x()] * sizeof (float_X))); if (isImageThread) { counter(localCell) = float_X(0.0); } if (localId == 0) { frame = &(pb.getFirstFrame(block, isValid)); } __syncthreads(); while (isValid) //move over all Frames { PMACC_AUTO(particle,(*frame)[localId]); if (particle[multiMask_] == 1) { int cellIdx = particle[localCellIdx_]; // we only draw the first slice of cells in the super cell (z == 0) const DataSpace<simDim> particleCellId(DataSpaceOperations<simDim>::template map<Block > (cellIdx)); #if(SIMDIM==DIM3) uint32_t globalParticleCell = particleCellId[sliceDim] + globalOffset + blockOffset[sliceDim]; if (globalParticleCell == slice) #endif { const DataSpace<DIM2> reducedCell(particleCellId[transpose.x()], particleCellId[transpose.y()]); atomicAddWrapper(&(counter(reducedCell)), particle[weighting_] / NUM_EL_PER_PARTICLE); } } __syncthreads(); if (localId == 0) { frame = &(pb.getNextFrame(*frame, isValid)); } __syncthreads(); } if (isImageThread) { /** Note: normally, we would multiply by NUM_EL_PER_PARTICLE again. * BUT: since we are interested in a simple value between 0 and 1, * we stay with this number (normalized to the order of macro * particles) and devide by the number of typical macro particles * per cell */ float_X value = counter(localCell) / float_X(particleInit::NUM_PARTICLES_PER_CELL); // * NUM_EL_PER_PARTICLE; if (value > 1.0) value = 1.0; //image(imageCell).x() = value; visPreview::preParticleDensCol::addRGB(image(imageCell), value, visPreview::preParticleDens_opacity); // cut to [0, 1] if (image(imageCell).x() < float_X(0.0)) image(imageCell).x() = float_X(0.0); if (image(imageCell).x() > float_X(1.0)) image(imageCell).x() = float_X(1.0); if (image(imageCell).y() < float_X(0.0)) image(imageCell).y() = float_X(0.0); if (image(imageCell).y() > float_X(1.0)) image(imageCell).y() = float_X(1.0); if (image(imageCell).z() < float_X(0.0)) image(imageCell).z() = float_X(0.0); if (image(imageCell).z() > float_X(1.0)) image(imageCell).z() = float_X(1.0); } }
__global__ void kernelPaintFields( EBox fieldE, BBox fieldB, JBox fieldJ, DataBox<PitchedBox<float3_X, DIM2> > image, DataSpace<DIM2> transpose, const int slice, const uint32_t globalOffset, const uint32_t sliceDim, Mapping mapper) { typedef typename MappingDesc::SuperCellSize Block; const DataSpace<simDim> threadId(threadIdx); const DataSpace<simDim> block = mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx)); const DataSpace<simDim> cell(block * Block::getDataSpace() + threadId); const DataSpace<simDim> blockOffset((block - mapper.getGuardingSuperCells()) * Block::getDataSpace()); const DataSpace<simDim> realCell(cell - MappingDesc::SuperCellSize::getDataSpace() * mapper.getGuardingSuperCells()); //delete guard from cell idx const DataSpace<DIM2> imageCell( realCell[transpose.x()], realCell[transpose.y()]); const DataSpace<simDim> realCell2(blockOffset + threadId); //delete guard from cell idx #if (SIMDIM==DIM3) uint32_t globalCell = realCell2[sliceDim] + globalOffset; if (globalCell != slice) return; #endif // set fields of this cell to vars typename BBox::ValueType field_b = fieldB(cell); typename EBox::ValueType field_e = fieldE(cell); typename JBox::ValueType field_j = fieldJ(cell); #if(SIMDIM==DIM3) field_j = float3_X( field_j.x() * CELL_HEIGHT * CELL_DEPTH, field_j.y() * CELL_WIDTH * CELL_DEPTH, field_j.z() * CELL_WIDTH * CELL_HEIGHT ); #elif (SIMDIM==DIM2) field_j = float3_X( field_j.x() * CELL_HEIGHT, field_j.y() * CELL_WIDTH, field_j.z() * CELL_WIDTH * CELL_HEIGHT ); #endif // reset picture to black // color range for each RGB channel: [0.0, 1.0] float3_X pic = float3_X(0., 0., 0.); // typical values of the fields to normalize them to [0,1] // pic.x() = visPreview::preChannel1(field_b / typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().x(), field_e / typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().y(), field_j / typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().z()); pic.y() = visPreview::preChannel2(field_b / typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().x(), field_e / typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().y(), field_j / typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().z()); pic.z() = visPreview::preChannel3(field_b / typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().x(), field_e / typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().y(), field_j / typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().z()); //visPreview::preChannel1Col::addRGB(pic, // visPreview::preChannel1(field_b * typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().x(), // field_e * typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().y(), // field_j * typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().z()), // visPreview::preChannel1_opacity); //visPreview::preChannel2Col::addRGB(pic, // visPreview::preChannel2(field_b * typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().x(), // field_e * typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().y(), // field_j * typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().z()), // visPreview::preChannel2_opacity); //visPreview::preChannel3Col::addRGB(pic, // visPreview::preChannel3(field_b * typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().x(), // field_e * typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().y(), // field_j * typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().z()), // visPreview::preChannel3_opacity); // draw to (perhaps smaller) image cell image(imageCell) = pic; }
HDINLINE DataSpace<DIM3> extend(const DataSpace<DIM3> &value) { return DataSpace<DIM3 > (value.x() / z, value.y(), value.x() % z); }
HDINLINE DataSpace<DIM3> reduce(const DataSpace<DIM3> &value) { z = value.z(); return DataSpace<DIM3 > (value.x() * z, value.y(), 1); }
static void writeField(ThreadParams *params, const std::string name, std::vector<float_64> unit, std::vector<float_64> unitDimension, std::vector<std::vector<float_X> > inCellPosition, float_X timeOffset, T_DataBoxType dataBox, const T_ValueType& ) { typedef T_DataBoxType NativeDataBoxType; typedef T_ValueType ValueType; typedef typename GetComponentsType<ValueType>::type ComponentType; typedef typename PICToSplash<ComponentType>::type SplashType; typedef typename PICToSplash<float_X>::type SplashFloatXType; const uint32_t nComponents = GetNComponents<ValueType>::value; SplashType splashType; ColTypeDouble ctDouble; SplashFloatXType splashFloatXType; log<picLog::INPUT_OUTPUT > ("HDF5 write field: %1% %2%") % name % nComponents; /* parameter checking */ PMACC_ASSERT( unit.size() == nComponents ); PMACC_ASSERT( inCellPosition.size() == nComponents ); for( uint32_t n = 0; n < nComponents; ++n ) PMACC_ASSERT( inCellPosition.at(n).size() == simDim ); PMACC_ASSERT(unitDimension.size() == 7); // seven openPMD base units /* component names */ const std::string recordName = std::string("fields/") + name; std::vector<std::string> name_lookup; { const std::string name_lookup_tpl[] = {"x", "y", "z", "w"}; for (uint32_t n = 0; n < nComponents; n++) name_lookup.push_back(name_lookup_tpl[n]); } /*data to describe source buffer*/ GridLayout<simDim> field_layout = params->gridLayout; DataSpace<simDim> field_no_guard = params->window.localDimensions.size; DataSpace<simDim> field_guard = field_layout.getGuard() + params->localWindowToDomainOffset; /* globalSlideOffset due to gpu slides between origin at time step 0 * and origin at current time step * ATTENTION: splash offset are globalSlideOffset + picongpu offsets */ DataSpace<simDim> globalSlideOffset; const PMacc::Selection<simDim>& localDomain = Environment<simDim>::get().SubGrid().getLocalDomain(); const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter(params->currentStep); globalSlideOffset.y() += numSlides * localDomain.size.y(); Dimensions splashGlobalDomainOffset(0, 0, 0); Dimensions splashGlobalOffsetFile(0, 0, 0); Dimensions splashGlobalDomainSize(1, 1, 1); for (uint32_t d = 0; d < simDim; ++d) { splashGlobalOffsetFile[d] = localDomain.offset[d]; splashGlobalDomainOffset[d] = params->window.globalDimensions.offset[d] + globalSlideOffset[d]; splashGlobalDomainSize[d] = params->window.globalDimensions.size[d]; } splashGlobalOffsetFile[1] = std::max(0, localDomain.offset[1] - params->window.globalDimensions.offset[1]); size_t tmpArraySize = field_no_guard.productOfComponents(); ComponentType* tmpArray = new ComponentType[tmpArraySize]; typedef DataBoxDim1Access<NativeDataBoxType > D1Box; D1Box d1Access(dataBox.shift(field_guard), field_no_guard); for (uint32_t n = 0; n < nComponents; n++) { /* copy data to temp array * tmpArray has the size of the data without any offsets */ for (size_t i = 0; i < tmpArraySize; ++i) { tmpArray[i] = d1Access[i][n]; } std::stringstream datasetName; datasetName << recordName; if (nComponents > 1) datasetName << "/" << name_lookup.at(n); Dimensions sizeSrcData(1, 1, 1); for (uint32_t d = 0; d < simDim; ++d) { sizeSrcData[d] = field_no_guard[d]; } params->dataCollector->writeDomain(params->currentStep, /* id == time step */ splashGlobalDomainSize, /* total size of dataset over all processes */ splashGlobalOffsetFile, /* write offset for this process */ splashType, /* data type */ simDim, /* NDims spatial dimensionality of the field */ splash::Selection(sizeSrcData), /* data size of this process */ datasetName.str().c_str(), /* data set name */ splash::Domain( splashGlobalDomainOffset, /* offset of the global domain */ splashGlobalDomainSize /* size of the global domain */ ), DomainCollector::GridType, tmpArray); /* attributes */ params->dataCollector->writeAttribute(params->currentStep, splashFloatXType, datasetName.str().c_str(), "position", 1u, Dimensions(simDim,0,0), &(*inCellPosition.at(n).begin())); params->dataCollector->writeAttribute(params->currentStep, ctDouble, datasetName.str().c_str(), "unitSI", &(unit.at(n))); } __deleteArray(tmpArray); params->dataCollector->writeAttribute(params->currentStep, ctDouble, recordName.c_str(), "unitDimension", 1u, Dimensions(7,0,0), &(*unitDimension.begin())); params->dataCollector->writeAttribute(params->currentStep, splashFloatXType, recordName.c_str(), "timeOffset", &timeOffset); const std::string geometry("cartesian"); ColTypeString ctGeometry(geometry.length()); params->dataCollector->writeAttribute(params->currentStep, ctGeometry, recordName.c_str(), "geometry", geometry.c_str()); const std::string dataOrder("C"); ColTypeString ctDataOrder(dataOrder.length()); params->dataCollector->writeAttribute(params->currentStep, ctDataOrder, recordName.c_str(), "dataOrder", dataOrder.c_str()); char axisLabels[simDim][2]; ColTypeString ctAxisLabels(1); for( uint32_t d = 0; d < simDim; ++d ) { axisLabels[simDim-1-d][0] = char('x' + d); // 3D: F[z][y][x], 2D: F[y][x] axisLabels[simDim-1-d][1] = '\0'; // terminator is important! } params->dataCollector->writeAttribute(params->currentStep, ctAxisLabels, recordName.c_str(), "axisLabels", 1u, Dimensions(simDim,0,0), axisLabels); // cellSize is {x, y, z} but fields are F[z][y][x] std::vector<float_X> gridSpacing(simDim, 0.0); for( uint32_t d = 0; d < simDim; ++d ) gridSpacing.at(simDim-1-d) = cellSize[d]; params->dataCollector->writeAttribute(params->currentStep, splashFloatXType, recordName.c_str(), "gridSpacing", 1u, Dimensions(simDim,0,0), &(*gridSpacing.begin())); // splashGlobalDomainOffset is {x, y, z} but fields are F[z][y][x] std::vector<float_64> gridGlobalOffset(simDim, 0.0); for( uint32_t d = 0; d < simDim; ++d ) gridGlobalOffset.at(simDim-1-d) = float_64(cellSize[d]) * float_64(splashGlobalDomainOffset[d]); params->dataCollector->writeAttribute(params->currentStep, ctDouble, recordName.c_str(), "gridGlobalOffset", 1u, Dimensions(simDim,0,0), &(*gridGlobalOffset.begin())); params->dataCollector->writeAttribute(params->currentStep, ctDouble, recordName.c_str(), "gridUnitSI", &UNIT_LENGTH); const std::string fieldSmoothing("none"); ColTypeString ctFieldSmoothing(fieldSmoothing.length()); params->dataCollector->writeAttribute(params->currentStep, ctFieldSmoothing, recordName.c_str(), "fieldSmoothing", fieldSmoothing.c_str()); }
__global__ void kernelParticleDensity(ParBox pb, DataBox<PitchedBox<Type_, DIM2> > image, DataSpace<DIM2> transpose, int slice, uint32_t globalOffset, uint32_t sliceDim, Mapping mapper) { typedef typename ParBox::FrameType FRAME; typedef typename MappingDesc::SuperCellSize Block; __shared__ FRAME *frame; __shared__ bool isValid; __syncthreads(); /*wait that all shared memory is initialised*/ bool isImageThread = false; const DataSpace<simDim> threadId(threadIdx); const DataSpace<DIM2> localCell(threadId[transpose.x()], threadId[transpose.y()]); const DataSpace<simDim> block = mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx)); const DataSpace<simDim> blockOffset((block - 1) * Block::getDataSpace()); int localId = threadIdx.z * Block::x * Block::y + threadIdx.y * Block::x + threadIdx.x; if (localId == 0) isValid = false; __syncthreads(); //\todo: guard size should not be set to (fixed) 1 here const DataSpace<simDim> realCell(blockOffset + threadId); //delete guard from cell idx #if(SIMDIM==DIM3) uint32_t globalCell = realCell[sliceDim] + globalOffset; if (globalCell == slice) #endif { isValid = true; isImageThread = true; } __syncthreads(); if (!isValid) return; /*index in image*/ DataSpace<DIM2> imageCell( realCell[transpose.x()], realCell[transpose.y()]); // counter is always DIM2 typedef DataBox < PitchedBox< float_X, DIM2 > > SharedMem; extern __shared__ float_X shBlock[]; __syncthreads(); /*wait that all shared memory is initialised*/ const DataSpace<simDim> blockSize(blockDim); SharedMem counter(PitchedBox<float_X, DIM2 > ((float_X*) shBlock, DataSpace<DIM2 > (), blockSize[transpose.x()] * sizeof (float_X))); if (isImageThread) { counter(localCell) = float_X(0.0); } if (localId == 0) { frame = &(pb.getFirstFrame(block, isValid)); } __syncthreads(); while (isValid) //move over all Frames { PMACC_AUTO(particle, (*frame)[localId]); if (particle[multiMask_] == 1) { int cellIdx = particle[localCellIdx_]; // we only draw the first slice of cells in the super cell (z == 0) const DataSpace<simDim> particleCellId(DataSpaceOperations<simDim>::template map<Block > (cellIdx)); #if(SIMDIM==DIM3) uint32_t globalParticleCell = particleCellId[sliceDim] + globalOffset + blockOffset[sliceDim]; if (globalParticleCell == slice) #endif { const DataSpace<DIM2> reducedCell(particleCellId[transpose.x()], particleCellId[transpose.y()]); atomicAddWrapper(&(counter(reducedCell)), particle[weighting_] / NUM_EL_PER_PARTICLE); } } __syncthreads(); if (localId == 0) { frame = &(pb.getNextFrame(*frame, isValid)); } __syncthreads(); } if (isImageThread) { image(imageCell) = (Type_) counter(localCell); } }