HDINLINE static DataSpace<DIM3> getBlockIndex(const Base &base, const DataSpace<DIM3>& _blockIdx, uint32_t exchangeType) { DataSpace<DIM3> result(_blockIdx); DataSpace<DIM3> directions = Mask::getRelativeDirections<DIM3 > (exchangeType); if (directions.x() == 0) result.x() += base.getGuardingSuperCells(); else if (directions.x() == 1) result.x() += base.getGridSuperCells().x() - base.getGuardingSuperCells(); if (directions.y() == 0) result.y() += base.getGuardingSuperCells(); else if (directions.y() == 1) result.y() += base.getGridSuperCells().y() - base.getGuardingSuperCells(); if (directions.z() == 0) result.z() += base.getGuardingSuperCells(); else if (directions.z() == 1) result.z() += base.getGridSuperCells().z() - base.getGuardingSuperCells(); return result; }
void notify(uint32_t currentStep) { typedef typename MappingDesc::SuperCellSize SuperCellSize; DataConnector& dc = Environment<>::get().DataConnector(); fieldE = &(dc.getData<FieldE > (FieldE::getName(), true)); fieldB = &(dc.getData<FieldB > (FieldB::getName(), true)); const int rank = Environment<simDim>::get().GridController().getGlobalRank(); getLineSliceFields < CORE + BORDER > (); const SubGrid<simDim>& subGrid = Environment<simDim>::get().SubGrid(); // number of cells on the current CPU for each direction const DataSpace<simDim> nrOfGpuCells = cellDescription->getGridLayout().getDataSpaceWithoutGuarding(); // global cell id offset (without guardings!) // returns the global id offset of the "first" border cell on a GPU const DataSpace<simDim> globalCellIdOffset(subGrid.getLocalDomain().offset); // global number of cells for whole simulation: local cells on GPU * GPUs // (assumed same size on each gpu :-/ -> todo: provide interface!) //! \todo create a function for: global number of cells for whole simulation //! const DataSpace<simDim> globalNrOfCells = subGrid.getGlobalDomain().size; /*FORMAT OUTPUT*/ /** \todo add float3_X with position of the cell to output*/ // check if the current GPU contains the "middle slice" along // X_global / 2; Y_global / 2 over Z if (globalCellIdOffset.x() <= globalNrOfCells.x() / 2 && globalCellIdOffset.x() + nrOfGpuCells.x() > globalNrOfCells.x() / 2) #if(SIMDIM==DIM3) if( globalCellIdOffset.z() <= globalNrOfCells.z() / 2 && globalCellIdOffset.z() + nrOfGpuCells.z() > globalNrOfCells.z() / 2) #endif for (int i = 0; i < nrOfGpuCells.y(); ++i) { const double xPos = double( i + globalCellIdOffset.y()) * SI::CELL_HEIGHT_SI; outfile << currentStep << " " << rank << " "; outfile << xPos << " " /*<< sliceDataField->getHostBuffer().getDataBox()[i] */ << double(sliceDataField->getHostBuffer().getDataBox()[i].x()) * UNIT_EFIELD << " " << double(sliceDataField->getHostBuffer().getDataBox()[i].y()) * UNIT_EFIELD << " " << double(sliceDataField->getHostBuffer().getDataBox()[i].z()) * UNIT_EFIELD << " " << "\n"; } /* outfile << "[ANALYSIS] [" << rank << "] [COUNTER] [LineSliceFields] [" << currentStep << "] " << sliceDataField << "\n"; */ // free line to separate timesteps in gnuplot via the "index" option outfile << std::endl; }
HINLINE static DataSpace<DIM3> getGridDim(const Base &base, const DataSpace<DIM3> &gBlocks) { const int x = gBlocks.x(); const int x_ = gBlocks.x() - 2 * base.getGuardingSuperCells(); const int y = gBlocks.y(); const int z_ = gBlocks.z() - 2 * base.getGuardingSuperCells(); return DataSpace<DIM3 > (x * y + z_ * y + x_*z_, 2 * base.getGuardingSuperCells(), 1); }
HINLINE static DataSpace<DIM2> getGridDim(const Base &base, const DataSpace<DIM2> &gBlocks) { return DataSpace<DIM2 > ( gBlocks.x() + gBlocks.y() - 2 * base.getGuardingSuperCells(), 2 * base.getGuardingSuperCells()); }
ParticleDensity(std::string name, Output output, uint32_t notifyFrequency, DataSpace<DIM2> transpose, float_X slicePoint) : output(output), analyzerName(name), cellDescription(NULL), particleTag(ParticlesType::FrameType::CommunicationTag), notifyFrequency(notifyFrequency), transpose(transpose), slicePoint(slicePoint), isMaster(false) { sliceDim = 0; if (transpose.x() == 0 || transpose.y() == 0) sliceDim = 1; if ((transpose.x() == 1 || transpose.y() == 1) && sliceDim == 1) sliceDim = 2; }
static std::string dspToStr(DataSpace<DIM3>& dsp) { std::stringstream stream; stream << "(" << dsp.x() << ", " << dsp.y() << ", " << dsp.z() << ")"; return stream.str(); }
HINLINE static DataSpace<DIM2> getGridDim(const Base &base, const DataSpace<DIM2> &gBlocks) { const uint32_t xOverhead = 2 * (base.getGuardingSuperCells()); const uint32_t yOverhead = xOverhead + 2 * (base.getBorderSuperCells()); return DataSpace<DIM2 > ( gBlocks.x() - xOverhead + gBlocks.y() - yOverhead, 2 * base.getBorderSuperCells()); }
HDINLINE static DataSpace<DIM3> getBlockIndex(const Base &base, const DataSpace<DIM3>& _blockIdx, uint32_t exchangeType) { DataSpace<DIM3> result(_blockIdx); DataSpace<DIM3> directions = Mask::getRelativeDirections<DIM3 > (exchangeType); size_t guardingBlocks = base.getGuardingSuperCells(); size_t borderBlocks = base.getBorderSuperCells(); switch (directions.x()) { case 0: result.x() += guardingBlocks + borderBlocks; break; case -1: result.x() += guardingBlocks; break; case 1: result.x() += base.getGridSuperCells().x() - guardingBlocks - borderBlocks; break; } switch (directions.y()) { case 0: result.y() += guardingBlocks + borderBlocks; break; case -1: result.y() += guardingBlocks; break; case 1: result.y() += base.getGridSuperCells().y() - guardingBlocks - borderBlocks; break; } switch (directions.z()) { case 0: result.z() += guardingBlocks + borderBlocks; break; case -1: result.z() += guardingBlocks; break; case 1: result.z() += base.getGridSuperCells().z() - guardingBlocks - borderBlocks; break; } return result; }
__global__ void kernelLineSliceFields(E_DataBox fieldE, B_DataBox fieldB, float3_X* sliceDataField, DataSpace<simDim> globalCellIdOffset, DataSpace<simDim> globalNrOfCells, Mapping mapper) { typedef typename Mapping::SuperCellSize SuperCellSize; const DataSpace<simDim > threadIndex(threadIdx); const int linearThreadIdx = DataSpaceOperations<simDim>::template map<SuperCellSize > (threadIndex); const DataSpace<simDim> superCellIdx(mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx))); __syncthreads(); // GPU-local cell id with lower GPU-local guarding const DataSpace<simDim> localCell(superCellIdx * SuperCellSize::toRT() + threadIndex); const float3_X b = fieldB(localCell); const float3_X e = fieldE(localCell); // GPU-local cell id without lower GPU-local guarding const DataSpace<simDim> localCellWG( localCell - SuperCellSize::toRT() * mapper.getGuardingSuperCells()); // global cell id const DataSpace<simDim> globalCell = localCellWG + globalCellIdOffset; // slice out one cell along an axis if ((globalCell.x() == globalNrOfCells.x() / 2)) #if(SIMDIM==DIM3) if(globalCell.z() == globalNrOfCells.z() / 2) #endif sliceDataField[localCellWG.y()] = e; __syncthreads(); }
HDINLINE static DataSpace<DIM2> getBlockIndex(const Base &base, const DataSpace<DIM2> &gBlocks, const DataSpace<DIM2>& _blockIdx) { const uint32_t width = gBlocks.x() - 2 * base.getGuardingSuperCells(); if (_blockIdx.x() < width) { return DataSpace<DIM2 > ( base.getGuardingSuperCells() + _blockIdx.x(), base.getGuardingSuperCells() + _blockIdx.y() / 2 + (_blockIdx.y() & 1u) * (gBlocks.y() - 2 * base.getGuardingSuperCells() - base.getBorderSuperCells())); /*gridBlocks.y()-2*blocksGuard-blocksBorder*/ } return DataSpace<DIM2 > ( base.getGuardingSuperCells() + _blockIdx.y() / 2 + (_blockIdx.y() & 1u) * (gBlocks.x() - base.getBorderSuperCells() - 2 * base.getGuardingSuperCells()), (base.getGuardingSuperCells() + base.getBorderSuperCells()) + _blockIdx.x() - width); }
/*! initializes all processes to build a 3D-grid * * @param nodes number of GPU nodes in each dimension * @param periodic specifying whether the grid is periodic (1) or not (0) in each dimension * * \warning throws invalid argument if cx*cy*cz != totalnodes */ void init(DataSpace<DIM3> numberProcesses, DataSpace<DIM3> periodic) throw (std::invalid_argument) { this->periodic = periodic; //check if parameters are correct MPI_CHECK(MPI_Comm_size(MPI_COMM_WORLD, &mpiSize)); if (numberProcesses.productOfComponents() != mpiSize) { throw std::invalid_argument("wrong parameters or wrong mpirun-call!"); } //1. create Communicator (computing_comm) of computing nodes (ranks 0...n) MPI_Comm computing_comm = MPI_COMM_WORLD; yoffset = 0; // 2. create topology //int dims[3]; dims[0] = numberProcesses.x(); dims[1] = numberProcesses.y(); dims[2] = numberProcesses.z(); topology = MPI_COMM_NULL; int periods[] = {periodic.x(), periodic.y(), periodic.z()}; /*create new communicator based on cartesian coordinates*/ MPI_CHECK(MPI_Cart_create(computing_comm, DIM, dims, periods, 0, &topology)); // 3. update Host rank hostRank = UpdateHostRank(); //4. update Coordinates updateCoordinates(); }
HINLINE static DataSpace<DIM2> getGridDim(const Base &base, uint32_t exchangeType) { DataSpace<DIM2> result(base.getGridSuperCells() - 2 * base.getGuardingSuperCells()); DataSpace<DIM2> directions = Mask::getRelativeDirections<DIM2 > (exchangeType); if (directions.x() != 0) result.x() = base.getGuardingSuperCells(); if (directions.y() != 0) result.y() = base.getGuardingSuperCells(); return result; }
HDINLINE static DataSpace<DIM3> getBlockIndex(const Base &base, const DataSpace<DIM3> &gBlocks, const DataSpace<DIM3>& _blockIdx) { const int g = base.getGuardingSuperCells(); const int b = base.getBorderSuperCells(); const int x = gBlocks.x() - 2 * g; const int x_ = gBlocks.x() - 2 * g - 2 * b; const int y = gBlocks.y() - 2 * g; const int z = gBlocks.z() - 2 * g; const int z_ = gBlocks.z() - 2 * g - 2 * b; if (_blockIdx.x() < (x * y)) { /* area is x*y */ const int tmp_x = _blockIdx.x(); return DataSpace<DIM3 > (tmp_x % x + g, tmp_x / x + g, g + _blockIdx.y() / 2 + (_blockIdx.y() & 1u) * (z - b)); } if ((_blockIdx.x() >= (x * y)) && _blockIdx.x() < (x * y + z_ * y)) { /* area is z_*y */ const int tmp_x = _blockIdx.x() - (x * y); return DataSpace<DIM3 > (g + _blockIdx.y() / 2 + (_blockIdx.y() & 1u) * (x - b), tmp_x / z_ + g, (tmp_x % z_) + g + b); } /* area is x_*z_ */ const int tmp_x = _blockIdx.x() - (x * y) - (z_ * y); return DataSpace<DIM3 > ((tmp_x % x_) + g + b, g + _blockIdx.y() / 2 + (_blockIdx.y() & 1u) * (y - b), (tmp_x / x_) + g + b); }
HDINLINE static DataSpace<DIM3> getBlockIndex(const Base &base, const DataSpace<DIM3> &gBlocks, const DataSpace<DIM3>& _blockIdx) { const int x = gBlocks.x(); const int x_ = gBlocks.x() - 2 * base.getGuardingSuperCells(); const int y = gBlocks.y(); const int z = gBlocks.z(); const int z_ = gBlocks.z() - 2 * base.getGuardingSuperCells(); if (_blockIdx.x() < (x * y)) { /* area is x*y */ const int tmp_x = _blockIdx.x(); return DataSpace<DIM3 > (tmp_x % x, tmp_x / x, _blockIdx.y() / 2 + (_blockIdx.y() & 1u) * (z - base.getGuardingSuperCells())); } if ((_blockIdx.x() >= (x * y)) && _blockIdx.x() < (x * y + z_ * y)) { /* area is z_*y */ const int tmp_x = _blockIdx.x() - (x * y); return DataSpace<DIM3 > (_blockIdx.y() / 2 + (_blockIdx.y() & 1u) * (x - base.getGuardingSuperCells()), tmp_x / z_, (tmp_x % z_) + base.getGuardingSuperCells()); } /* area is x_*z_ */ const int tmp_x = _blockIdx.x() - (x * y) - (z_ * y); return DataSpace<DIM3 > ((tmp_x % x_) + base.getGuardingSuperCells(), _blockIdx.y() / 2 + (_blockIdx.y() & 1u) * (y - base.getGuardingSuperCells()), (tmp_x / x_) + base.getGuardingSuperCells()); }
HDINLINE static DataSpace<DIM2> getBlockIndex(const Base &base, const DataSpace<DIM2> &gBlocks, const DataSpace<DIM2>& _blockIdx) { if (_blockIdx.x() < gBlocks.x()) { return DataSpace<DIM2 > ( _blockIdx.x(), _blockIdx.y() / 2 + (_blockIdx.y() & 1u) * (gBlocks.y() - base.getGuardingSuperCells())); } return DataSpace<DIM2 > ( _blockIdx.y() / 2 + (_blockIdx.y() & 1u) * (gBlocks.x() - base.getGuardingSuperCells()), base.getGuardingSuperCells() + _blockIdx.x() - gBlocks.x()); }
HDINLINE RefValueType operator()(const DataSpace<DIM1> &idx = DataSpace<DIM1>()) { return Base::operator[](idx.x()); }
HDINLINE RefValueType operator()(const DataSpace<DIM2> &idx = DataSpace<DIM2>()) { return (Base::operator[](idx.y()))[idx.x()]; }
__global__ void kernelParticleDensity(ParBox pb, DataBox<PitchedBox<Type_, DIM2> > image, DataSpace<DIM2> transpose, int slice, uint32_t globalOffset, uint32_t sliceDim, Mapping mapper) { typedef typename ParBox::FrameType FRAME; typedef typename MappingDesc::SuperCellSize Block; __shared__ FRAME *frame; __shared__ bool isValid; __syncthreads(); /*wait that all shared memory is initialised*/ bool isImageThread = false; const DataSpace<simDim> threadId(threadIdx); const DataSpace<DIM2> localCell(threadId[transpose.x()], threadId[transpose.y()]); const DataSpace<simDim> block = mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx)); const DataSpace<simDim> blockOffset((block - 1) * Block::getDataSpace()); int localId = threadIdx.z * Block::x * Block::y + threadIdx.y * Block::x + threadIdx.x; if (localId == 0) isValid = false; __syncthreads(); //\todo: guard size should not be set to (fixed) 1 here const DataSpace<simDim> realCell(blockOffset + threadId); //delete guard from cell idx uint32_t globalCell = realCell[sliceDim] + globalOffset; if (globalCell == slice) { isValid = true; isImageThread = true; } __syncthreads(); if (!isValid) return; /*index in image*/ DataSpace<DIM2> imageCell( realCell[transpose.x()], realCell[transpose.y()]); // counter is always DIM2 typedef DataBox < PitchedBox< float_X, DIM2 > > SharedMem; extern __shared__ float_X shBlock[]; __syncthreads(); /*wait that all shared memory is initialised*/ const DataSpace<simDim> blockSize(blockDim); SharedMem counter(PitchedBox<float_X, DIM2 > ((float_X*) shBlock, DataSpace<DIM2 > (), blockSize[transpose.x()] * sizeof (float_X))); if (isImageThread) { counter(localCell) = float_X(0.0); } if (localId == 0) { frame = &(pb.getFirstFrame(block, isValid)); } __syncthreads(); while (isValid) //move over all Frames { PMACC_AUTO(particle,(*frame)[localId]); if (particle[multiMask_] == 1) { int cellIdx = particle[localCellIdx_]; // we only draw the first slice of cells in the super cell (z == 0) const DataSpace<DIM3> particleCellId(DataSpaceOperations<DIM3>::template map<Block > (cellIdx)); uint32_t globalParticleCell = particleCellId[sliceDim] + globalOffset + blockOffset[sliceDim]; if (globalParticleCell == slice) { const DataSpace<DIM2> reducedCell(particleCellId[transpose.x()], particleCellId[transpose.y()]); atomicAddWrapper(&(counter(reducedCell)), particle[weighting_] / NUM_EL_PER_PARTICLE); } } __syncthreads(); if (localId == 0) { frame = &(pb.getNextFrame(*frame, isValid)); } __syncthreads(); } if (isImageThread) { image(imageCell) = (Type_) counter(localCell); } }
HDINLINE DataSpace<DIM3> reduce(const DataSpace<DIM3> &value) { z = value.z(); return DataSpace<DIM3 > (value.x() * z, value.y(), 1); }
void operator()( const Box data, const ValueType unit, const Size2D size, const MessageHeader & header) { if (createFolder) { mkdir((folder).c_str(), 0755); createFolder = false; } std::stringstream step; step << std::setw(6) << std::setfill('0') << header.sim.step; //std::string filename(name + "_" + step.str() + ".bin"); std::string filename(name + "_" + step.str() + ".dat"); double x_cell = header.sim.cellSizeArr[0]; double y_cell = header.sim.cellSizeArr[1]; double x_simOff = header.sim.simOffsetToNull[0]; double y_simOff = header.sim.simOffsetToNull[1]; DataSpace<DIM2> gOffset = header.window.offset; std::ofstream file(filename.c_str(), std::ofstream::out ); //| std::ofstream::binary); typedef std::numeric_limits< ValueType > dbl; file.precision(dbl::digits10); file << std::scientific; ValueType sizex = (int) size.x(); //file.write((char*) (&sizex), sizeof (ValueType)); file << sizex << " "; //first line with y header information for (int x = 0; x < size.x(); ++x) { ValueType cellPos = (ValueType) ((x + x_simOff + gOffset.x()) * x_cell * UNIT_LENGTH); //file.write((char*) &(cellPos), sizeof (ValueType)); file << cellPos << " "; } file << std::endl; //the first column is for x header information for (int y = 0; y < size.y(); ++y) { for (int x = 0; x <= size.x(); ++x) { if (x == 0) { ValueType cellPos = (ValueType) ((y + y_simOff + gOffset.y()) * y_cell * UNIT_LENGTH); //file.write((char*) &(cellPos), sizeof (ValueType)); file << cellPos; } else { const ValueType value = precisionCast<ValueType>(data[y][x]) * unit; /** \info take care, that gnuplots binary matrix does * not support float64 (IEEE float32 only) * \see http://stackoverflow.com/questions/8751154/looking-at-binary-output-from-fortran-on-gnuplot * http://gnuplot.sourceforge.net/docs_4.2/node101.html */ //file.write((char*) &(value), sizeof (ValueType)); file << " " << value; } } file << std::endl; } file.close(); }
HDINLINE DataSpace<DIM3> extend(const DataSpace<DIM3> &value) { return DataSpace<DIM3 > (value.x() / z, value.y(), value.x() % z); }
DINLINE void operator()(DataBoxJ dataBoxJ, const PosType pos, const VelType velocity, const ChargeType charge, const float_X deltaTime) { this->charge = charge; const float3_X deltaPos = float3_X(velocity.x() * deltaTime / cellSize.x(), velocity.y() * deltaTime / cellSize.y(), velocity.z() * deltaTime / cellSize.z()); const PosType oldPos = pos - deltaPos; Line<float3_X> line(oldPos, pos); DataSpace<DIM3> gridShift; /* Define in which direction the particle leaves the cell. * It is not relevant whether the particle leaves the cell via * the positive or negative cell border. * * 0 == stay in cell * 1 == leave cell */ DataSpace<simDim> leaveCell; /* calculate the offset for the virtual coordinate system */ for(int d=0; d<simDim; ++d) { int iStart; int iEnd; constexpr bool isSupportEven = ( supp % 2 == 0 ); RelayPoint< isSupportEven >()( iStart, iEnd, line.m_pos0[d], line.m_pos1[d] ); gridShift[d] = iStart < iEnd ? iStart : iEnd; // integer min function /* particle is leaving the cell */ leaveCell[d] = iStart != iEnd ? 1 : 0; /* shift the particle position to the virtual coordinate system */ line.m_pos0[d] -= gridShift[d]; line.m_pos1[d] -= gridShift[d]; } /* shift current field to the virtual coordinate system */ auto cursorJ = dataBoxJ.shift(gridShift).toCursor(); /** * \brief the following three calls separate the 3D current deposition * into three independent 1D calls, each for one direction and current component. * Therefore the coordinate system has to be rotated so that the z-direction * is always specific. */ using namespace cursor::tools; cptCurrent1D( DataSpace<simDim>(leaveCell.y(),leaveCell.z(),leaveCell.x()), twistVectorFieldAxes<PMacc::math::CT::Int < 1, 2, 0 > >(cursorJ), rotateOrigin < 1, 2, 0 > (line), cellSize.x() ); cptCurrent1D( DataSpace<simDim>(leaveCell.z(),leaveCell.x(),leaveCell.y()), twistVectorFieldAxes<PMacc::math::CT::Int < 2, 0, 1 > >(cursorJ), rotateOrigin < 2, 0, 1 > (line), cellSize.y() ); cptCurrent1D( leaveCell, cursorJ, line, cellSize.z() ); }
__global__ void kernelPaintFields( EBox fieldE, BBox fieldB, JBox fieldJ, DataBox<PitchedBox<float3_X, DIM2> > image, DataSpace<DIM2> transpose, const int slice, const uint32_t globalOffset, const uint32_t sliceDim, Mapping mapper) { typedef typename MappingDesc::SuperCellSize Block; const DataSpace<simDim> threadId(threadIdx); const DataSpace<simDim> block = mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx)); const DataSpace<simDim> cell(block * Block::getDataSpace() + threadId); const DataSpace<simDim> blockOffset((block - mapper.getGuardingSuperCells()) * Block::getDataSpace()); const DataSpace<simDim> realCell(cell - MappingDesc::SuperCellSize::getDataSpace() * mapper.getGuardingSuperCells()); //delete guard from cell idx const DataSpace<DIM2> imageCell( realCell[transpose.x()], realCell[transpose.y()]); const DataSpace<simDim> realCell2(blockOffset + threadId); //delete guard from cell idx #if (SIMDIM==DIM3) uint32_t globalCell = realCell2[sliceDim] + globalOffset; if (globalCell != slice) return; #endif // set fields of this cell to vars typename BBox::ValueType field_b = fieldB(cell); typename EBox::ValueType field_e = fieldE(cell); typename JBox::ValueType field_j = fieldJ(cell); #if(SIMDIM==DIM3) field_j = float3_X( field_j.x() * CELL_HEIGHT * CELL_DEPTH, field_j.y() * CELL_WIDTH * CELL_DEPTH, field_j.z() * CELL_WIDTH * CELL_HEIGHT ); #elif (SIMDIM==DIM2) field_j = float3_X( field_j.x() * CELL_HEIGHT, field_j.y() * CELL_WIDTH, field_j.z() * CELL_WIDTH * CELL_HEIGHT ); #endif // reset picture to black // color range for each RGB channel: [0.0, 1.0] float3_X pic = float3_X(0., 0., 0.); // typical values of the fields to normalize them to [0,1] // pic.x() = visPreview::preChannel1(field_b / typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().x(), field_e / typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().y(), field_j / typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().z()); pic.y() = visPreview::preChannel2(field_b / typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().x(), field_e / typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().y(), field_j / typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().z()); pic.z() = visPreview::preChannel3(field_b / typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().x(), field_e / typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().y(), field_j / typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().z()); //visPreview::preChannel1Col::addRGB(pic, // visPreview::preChannel1(field_b * typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().x(), // field_e * typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().y(), // field_j * typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().z()), // visPreview::preChannel1_opacity); //visPreview::preChannel2Col::addRGB(pic, // visPreview::preChannel2(field_b * typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().x(), // field_e * typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().y(), // field_j * typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().z()), // visPreview::preChannel2_opacity); //visPreview::preChannel3Col::addRGB(pic, // visPreview::preChannel3(field_b * typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().x(), // field_e * typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().y(), // field_j * typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().z()), // visPreview::preChannel3_opacity); // draw to (perhaps smaller) image cell image(imageCell) = pic; }
__global__ void kernelPaintParticles3D(ParBox pb, DataBox<PitchedBox<float3_X, DIM2> > image, DataSpace<DIM2> transpose, int slice, uint32_t globalOffset, uint32_t sliceDim, Mapping mapper) { typedef typename ParBox::FrameType FRAME; typedef typename MappingDesc::SuperCellSize Block; __shared__ FRAME *frame; __shared__ bool isValid; __syncthreads(); /*wait that all shared memory is initialised*/ bool isImageThread = false; const DataSpace<simDim> threadId(threadIdx); const DataSpace<DIM2> localCell(threadId[transpose.x()], threadId[transpose.y()]); const DataSpace<simDim> block = mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx)); const DataSpace<simDim> blockOffset((block - 1) * Block::getDataSpace()); int localId = threadIdx.z * Block::x * Block::y + threadIdx.y * Block::x + threadIdx.x; if (localId == 0) isValid = false; __syncthreads(); //\todo: guard size should not be set to (fixed) 1 here const DataSpace<simDim> realCell(blockOffset + threadId); //delete guard from cell idx #if(SIMDIM==DIM3) uint32_t globalCell = realCell[sliceDim] + globalOffset; if (globalCell == slice) #endif { atomicExch((int*) &isValid, 1); /*WAW Error in cuda-memcheck racecheck*/ isImageThread = true; } __syncthreads(); if (!isValid) return; /*index in image*/ DataSpace<DIM2> imageCell( realCell[transpose.x()], realCell[transpose.y()]); // counter is always DIM2 typedef DataBox < PitchedBox< float_X, DIM2 > > SharedMem; extern __shared__ float_X shBlock[]; __syncthreads(); /*wait that all shared memory is initialised*/ const DataSpace<simDim> blockSize(blockDim); SharedMem counter(PitchedBox<float_X, DIM2 > ((float_X*) shBlock, DataSpace<DIM2 > (), blockSize[transpose.x()] * sizeof (float_X))); if (isImageThread) { counter(localCell) = float_X(0.0); } if (localId == 0) { frame = &(pb.getFirstFrame(block, isValid)); } __syncthreads(); while (isValid) //move over all Frames { PMACC_AUTO(particle,(*frame)[localId]); if (particle[multiMask_] == 1) { int cellIdx = particle[localCellIdx_]; // we only draw the first slice of cells in the super cell (z == 0) const DataSpace<simDim> particleCellId(DataSpaceOperations<simDim>::template map<Block > (cellIdx)); #if(SIMDIM==DIM3) uint32_t globalParticleCell = particleCellId[sliceDim] + globalOffset + blockOffset[sliceDim]; if (globalParticleCell == slice) #endif { const DataSpace<DIM2> reducedCell(particleCellId[transpose.x()], particleCellId[transpose.y()]); atomicAddWrapper(&(counter(reducedCell)), particle[weighting_] / NUM_EL_PER_PARTICLE); } } __syncthreads(); if (localId == 0) { frame = &(pb.getNextFrame(*frame, isValid)); } __syncthreads(); } if (isImageThread) { /** Note: normally, we would multiply by NUM_EL_PER_PARTICLE again. * BUT: since we are interested in a simple value between 0 and 1, * we stay with this number (normalized to the order of macro * particles) and devide by the number of typical macro particles * per cell */ float_X value = counter(localCell) / float_X(particleInit::NUM_PARTICLES_PER_CELL); // * NUM_EL_PER_PARTICLE; if (value > 1.0) value = 1.0; //image(imageCell).x() = value; visPreview::preParticleDensCol::addRGB(image(imageCell), value, visPreview::preParticleDens_opacity); // cut to [0, 1] if (image(imageCell).x() < float_X(0.0)) image(imageCell).x() = float_X(0.0); if (image(imageCell).x() > float_X(1.0)) image(imageCell).x() = float_X(1.0); if (image(imageCell).y() < float_X(0.0)) image(imageCell).y() = float_X(0.0); if (image(imageCell).y() > float_X(1.0)) image(imageCell).y() = float_X(1.0); if (image(imageCell).z() < float_X(0.0)) image(imageCell).z() = float_X(0.0); if (image(imageCell).z() > float_X(1.0)) image(imageCell).z() = float_X(1.0); } }
void createImage(uint32_t currentStep, VirtualWindow window) { DataConnector &dc = DataConnector::getInstance(); // Data does not need to be synchronized as visualization is // done at the device. FieldB *fieldB = &(dc.getData<FieldB > (FieldB::getName(), true)); FieldE* fieldE = &(dc.getData<FieldE > (FieldE::getName(), true)); FieldJ* fieldJ = &(dc.getData<FieldJ > (FieldJ::getName(), true)); ParticlesType* particles = &(dc.getData<ParticlesType > (particleTag, true)); PMACC_AUTO(simBox, SubGrid<simDim>::getInstance().getSimulationBox()); uint32_t globalOffset = 0; #if(SIMDIM==DIM3) globalOffset = SubGrid<simDim>::getInstance().getSimulationBox().getGlobalOffset()[sliceDim]; #endif typedef MappingDesc::SuperCellSize SuperCellSize; assert(cellDescription != NULL); //create image fields __picKernelArea((kernelPaintFields), *cellDescription, CORE + BORDER) (SuperCellSize::getDataSpace()) (fieldE->getDeviceDataBox(), fieldB->getDeviceDataBox(), fieldJ->getDeviceDataBox(), img->getDeviceBuffer().getDataBox(), transpose, sliceOffset, globalOffset, sliceDim ); // find maximum for img.x()/y and z and return it as float3_X int elements = img->getGridLayout().getDataSpace().productOfComponents(); //Add one dimension access to 2d DataBox typedef DataBoxDim1Access<typename GridBuffer<float3_X, DIM2 >::DataBoxType> D1Box; D1Box d1access(img->getDeviceBuffer().getDataBox(), img->getGridLayout().getDataSpace()); #if (EM_FIELD_SCALE_CHANNEL1 == -1 || EM_FIELD_SCALE_CHANNEL2 == -1 || EM_FIELD_SCALE_CHANNEL3 == -1) //reduce with functor max float3_X max = reduce(nvidia::functors::Max(), d1access, elements); //reduce with functor min //float3_X min = reduce(nvidia::functors::Min(), // d1access, // elements); #if (EM_FIELD_SCALE_CHANNEL1 != -1 ) max.x() = float_X(1.0); #endif #if (EM_FIELD_SCALE_CHANNEL2 != -1 ) max.y() = float_X(1.0); #endif #if (EM_FIELD_SCALE_CHANNEL3 != -1 ) max.z() = float_X(1.0); #endif //We don't know the superCellSize at compile time // (because of the runtime dimension selection in any analyser), // thus we must use a one dimension kernel and no mapper __cudaKernel(vis_kernels::divideAnyCell)(ceil((double) elements / 256), 256)(d1access, elements, max); #endif // convert channels to RGB __cudaKernel(vis_kernels::channelsToRGB)(ceil((double) elements / 256), 256)(d1access, elements); // add density color channel DataSpace<simDim> blockSize(MappingDesc::SuperCellSize::getDataSpace()); DataSpace<DIM2> blockSize2D(blockSize[transpose.x()], blockSize[transpose.y()]); //create image particles __picKernelArea((kernelPaintParticles3D), *cellDescription, CORE + BORDER) (SuperCellSize::getDataSpace(), blockSize2D.productOfComponents() * sizeof (int)) (particles->getDeviceParticlesBox(), img->getDeviceBuffer().getDataBox(), transpose, sliceOffset, globalOffset, sliceDim ); // send the RGB image back to host img->deviceToHost(); header.update(*cellDescription, window, transpose, currentStep); __getTransactionEvent().waitForFinished(); //wait for copy picture DataSpace<DIM2> size = img->getGridLayout().getDataSpace(); PMACC_AUTO(hostBox, img->getHostBuffer().getDataBox()); if (picongpu::white_box_per_GPU) { hostBox[0 ][0 ] = float3_X(1.0, 1.0, 1.0); hostBox[size.y() - 1 ][0 ] = float3_X(1.0, 1.0, 1.0); hostBox[0 ][size.x() - 1] = float3_X(1.0, 1.0, 1.0); hostBox[size.y() - 1 ][size.x() - 1] = float3_X(1.0, 1.0, 1.0); } PMACC_AUTO(resultBox, gather(hostBox, header)); if (isMaster) { output(resultBox.shift(header.window.offset), header.window.size, header); } }
float3_64 getGlobalCell() const { return float3_64( typeCast<float_64>(globalCellOffset.x()) + typeCast<float_64>(position.x()), typeCast<float_64>(globalCellOffset.y()) + typeCast<float_64>(position.y()), typeCast<float_64>(globalCellOffset.z()) + typeCast<float_64>(position.z()) ); }