void operator()( T_Field field, T_OpFunctor opFunctor, T_ValFunctor valFunctor, uint32_t currentStep, const bool enabled = true ) const { if( !enabled ) return; const SubGrid<simDim>& subGrid = Environment<simDim>::get().SubGrid(); /** offset due to being the n-th GPU */ DataSpace<simDim> totalCellOffset(subGrid.getLocalDomain().offset); const uint32_t numSlides = MovingWindow::getInstance().getSlideCounter( currentStep ); /** Assumption: all GPUs have the same number of cells in * y direction for sliding window */ totalCellOffset.y() += numSlides * subGrid.getLocalDomain().size.y(); /* the first block will start with less offset if started in the GUARD */ if( T_Area & GUARD) totalCellOffset -= m_cellDescription.getSuperCellSize() * m_cellDescription.getGuardingSuperCells(); /* if we run _only_ in the CORE we have to add the BORDER's offset */ else if( T_Area == CORE ) totalCellOffset += m_cellDescription.getSuperCellSize() * m_cellDescription.getBorderSuperCells(); /* start kernel */ __picKernelArea((kernelCellwiseOperation<T_OpFunctor>), m_cellDescription, T_Area) (SuperCellSize::toRT().toDim3()) (field->getDeviceDataBox(), opFunctor, valFunctor, totalCellOffset, currentStep); }
static void addOneParticle(ParticlesClass& parClass, MappingDesc cellDescription, DataSpace<simDim> globalCell) { const SubGrid<simDim>& subGrid = Environment<simDim>::get().SubGrid(); const DataSpace<simDim> globalTopLeft = subGrid.getLocalDomain().offset; const DataSpace<simDim> localSimulationArea = subGrid.getLocalDomain().size; DataSpace<simDim> localParCell = globalCell - globalTopLeft; for (int i = 0; i < (int) simDim; ++i) { //chek if particle is in the simulation area if (localParCell[i] < 0 || localParCell[i] >= localSimulationArea[i]) return; } //calculate supercell DataSpace<simDim> localSuperCell = (localParCell / MappingDesc::SuperCellSize::toRT()); DataSpace<simDim> cellInSuperCell = localParCell - (localSuperCell * MappingDesc::SuperCellSize::toRT()); //add garding blocks to supercell localSuperCell = localSuperCell + cellDescription.getGuardingSuperCells(); __cudaKernel(kernelAddOneParticle) (1, 1) (parClass.getDeviceParticlesBox(), localSuperCell, cellInSuperCell); parClass.fillAllGaps(); std::cout << "Wait for add particle" << std::endl; __getTransactionEvent().waitForFinished(); }
SglParticle<FloatPos> getPositionsParticles(uint32_t currentStep) { typedef typename MappingDesc::SuperCellSize SuperCellSize; SglParticle<FloatPos> positionParticleTmp; gParticle->getDeviceBuffer().setValue(positionParticleTmp); dim3 block(SuperCellSize::getDataSpace()); __picKernelArea(kernelPositionsParticles, *cellDescription, AREA) (block) (particles->getDeviceParticlesBox(), gParticle->getDeviceBuffer().getBasePointer()); gParticle->deviceToHost(); DataSpace<simDim> localSize(cellDescription->getGridLayout().getDataSpaceWithoutGuarding()); VirtualWindow window(MovingWindow::getInstance().getVirtualWindow(currentStep)); DataSpace<simDim> gpuPhyCellOffset(SubGrid<simDim>::getInstance().getSimulationBox().getGlobalOffset()); gpuPhyCellOffset.y() += (localSize.y() * window.slides); gParticle->getHostBuffer().getDataBox()[0].globalCellOffset += gpuPhyCellOffset; return gParticle->getHostBuffer().getDataBox()[0]; }
static void addOneParticle(ParticlesClass& parClass, MappingDesc cellDescription, DataSpace<DIM3> globalCell) { PMACC_AUTO(simBox, SubGrid<simDim>::getInstance().getSimulationBox()); const DataSpace<DIM3> globalTopLeft = simBox.getGlobalOffset(); const DataSpace<DIM3> localSimulationArea = simBox.getLocalSize(); DataSpace<DIM3> localParCell = globalCell - globalTopLeft; for (int i = 0; i < (int) DIM3; ++i) { //chek if particle is in the simulation area if (localParCell[i] < 0 || localParCell[i] >= localSimulationArea[i]) return; } //calculate supercell DataSpace<DIM3> localSuperCell = (localParCell / MappingDesc::SuperCellSize::getDataSpace()); DataSpace<DIM3> cellInSuperCell = localParCell - (localSuperCell * MappingDesc::SuperCellSize::getDataSpace()); //add garding blocks to supercell localSuperCell = localSuperCell + cellDescription.getGuardingSuperCells(); __cudaKernel(kernelAddOneParticle) (1, 1) (parClass.getDeviceParticlesBox(), localSuperCell, cellInSuperCell); parClass.fillAllGaps(); std::cout << "Wait for add particle" << std::endl; __getTransactionEvent().waitForFinished(); }
void notify(uint32_t currentStep) { typedef typename MappingDesc::SuperCellSize SuperCellSize; DataConnector& dc = Environment<>::get().DataConnector(); fieldE = &(dc.getData<FieldE > (FieldE::getName(), true)); fieldB = &(dc.getData<FieldB > (FieldB::getName(), true)); const int rank = Environment<simDim>::get().GridController().getGlobalRank(); getLineSliceFields < CORE + BORDER > (); const SubGrid<simDim>& subGrid = Environment<simDim>::get().SubGrid(); // number of cells on the current CPU for each direction const DataSpace<simDim> nrOfGpuCells = cellDescription->getGridLayout().getDataSpaceWithoutGuarding(); // global cell id offset (without guardings!) // returns the global id offset of the "first" border cell on a GPU const DataSpace<simDim> globalCellIdOffset(subGrid.getLocalDomain().offset); // global number of cells for whole simulation: local cells on GPU * GPUs // (assumed same size on each gpu :-/ -> todo: provide interface!) //! \todo create a function for: global number of cells for whole simulation //! const DataSpace<simDim> globalNrOfCells = subGrid.getGlobalDomain().size; /*FORMAT OUTPUT*/ /** \todo add float3_X with position of the cell to output*/ // check if the current GPU contains the "middle slice" along // X_global / 2; Y_global / 2 over Z if (globalCellIdOffset.x() <= globalNrOfCells.x() / 2 && globalCellIdOffset.x() + nrOfGpuCells.x() > globalNrOfCells.x() / 2) #if(SIMDIM==DIM3) if( globalCellIdOffset.z() <= globalNrOfCells.z() / 2 && globalCellIdOffset.z() + nrOfGpuCells.z() > globalNrOfCells.z() / 2) #endif for (int i = 0; i < nrOfGpuCells.y(); ++i) { const double xPos = double( i + globalCellIdOffset.y()) * SI::CELL_HEIGHT_SI; outfile << currentStep << " " << rank << " "; outfile << xPos << " " /*<< sliceDataField->getHostBuffer().getDataBox()[i] */ << double(sliceDataField->getHostBuffer().getDataBox()[i].x()) * UNIT_EFIELD << " " << double(sliceDataField->getHostBuffer().getDataBox()[i].y()) * UNIT_EFIELD << " " << double(sliceDataField->getHostBuffer().getDataBox()[i].z()) * UNIT_EFIELD << " " << "\n"; } /* outfile << "[ANALYSIS] [" << rank << "] [COUNTER] [LineSliceFields] [" << currentStep << "] " << sliceDataField << "\n"; */ // free line to separate timesteps in gnuplot via the "index" option outfile << std::endl; }