void oneStep(uint32_t currentStep, Buffer* read, Buffer* write) { PMACC_AUTO(splitEvent, __getTransactionEvent()); /* GridBuffer 'read' will use 'splitEvent' to schedule transaction * * tasks from the Guard of this local Area to the Borders of the * * neighboring areas added by 'addExchange'. All transactions in * * Transaction Manager will then be done in parallel to the * * calculations in the core. In order to synchronize the data * * transfer for the case the core calculation is finished earlier, * * GridBuffer.asyncComm returns a transaction handle we can check */ PMACC_AUTO(send, read->asyncCommunication(splitEvent)); evo.run<CORE>( read->getDeviceBuffer().getDataBox(), write->getDeviceBuffer().getDataBox() ); /* Join communication with worker tasks, Now all next tasks run sequential */ __setTransactionEvent(send); /* Calculate Borders */ evo.run<BORDER>( read->getDeviceBuffer().getDataBox(), write->getDeviceBuffer().getDataBox() ); write->deviceToHost(); /* gather::operator() gathers all the buffers and assembles those to * * a complete picture discarding the guards. */ PMACC_AUTO(picture, gather(write->getHostBuffer().getDataBox())); PngCreator png; if (isMaster) png(currentStep, picture, gridSize); }
/** Functor * * @param currentStep the current time step * @param speciesGroup naming for the group of species in T_SpeciesList */ void operator()( uint32_t currentStep, std::string const & speciesGroup ) { // generating a density requires at least one slot in FieldTmp PMACC_CASSERT_MSG( _please_allocate_at_least_one_FieldTmp_in_memory_param, fieldTmpNumSlots > 0 ); DataConnector &dc = Environment<>::get().DataConnector(); // load FieldTmp without copy data to host and zero it auto fieldTmp = dc.get< FieldTmp >( FieldTmp::getUniqueId( 0 ), true ); using DensityValueType = typename FieldTmp::ValueType; fieldTmp->getGridBuffer().getDeviceBuffer().setValue( DensityValueType::create(0.0) ); // add density of each species in list to FieldTmp ForEach< SpeciesList, detail::AddSingleDensity< bmpl::_1 > > addSingleDensity; addSingleDensity( currentStep, forward( fieldTmp ) ); /* create valid density in the BORDER region * note: for average != supercell multiples the GUARD of fieldTmp * also needs to be filled in the communication above */ EventTask fieldTmpEvent = fieldTmp->asyncCommunication(__getTransactionEvent()); __setTransactionEvent(fieldTmpEvent); /* average summed density in FieldTmp down to local resolution and * write in new field */ auto nlocal = dc.get< LocalDensity >( helperFields::LocalDensity::getName( speciesGroup ), true ); constexpr uint32_t numWorkers = pmacc::traits::GetNumWorkers< pmacc::math::CT::volume< SuperCellSize >::type::value >::value; PMACC_KERNEL( helperFields::KernelAverageDensity< numWorkers >{ } ) ( // one block per averaged density value nlocal->getGridBuffer().getGridLayout().getDataSpaceWithoutGuarding(), numWorkers ) ( // start in border (jump over GUARD area) fieldTmp->getDeviceDataBox().shift( SuperCellSize::toRT() * GuardSize::toRT() ), // start in border (has no GUARD area) nlocal->getGridBuffer().getDeviceBuffer( ).getDataBox( ) ); // release fields dc.releaseData( FieldTmp::getUniqueId( 0 ) ); dc.releaseData( helperFields::LocalDensity::getName( speciesGroup ) ); }
void activateChecks() { canBeChecked = true; this->activate(); Environment<>::get().Manager().addTask(this); __setTransactionEvent(EventTask(this->getId())); }
/** * Copies data and additional pointers from device to host. */ void deviceToHost() { __startTransaction(__getTransactionEvent()); ringDataSizes->deviceToHost(); EventTask ev1 = __endTransaction(); __startTransaction(__getTransactionEvent()); ringData->deviceToHost(); __setTransactionEvent(__endTransaction() + ev1); }
/** * Copies data and additional pointers from host to device. */ void hostToDevice() { __startTransaction(__getTransactionEvent()); ringDataSizes->hostToDevice(); EventTask ev1 = __endTransaction(); __startTransaction(__getTransactionEvent()); ringData->hostToDevice(); __setTransactionEvent(__endTransaction() + ev1); }
/** * Copies this data and the RingBuffer data from host to device. */ void hostToDevice() { __startTransaction(__getTransactionEvent()); ringBuffer->hostToDevice(); EventTask ev1 = __endTransaction(); __startTransaction(__getTransactionEvent()); GridBuffer<VALUE, DIM1, BORDERVALUE>::hostToDevice(); __setTransactionEvent(__endTransaction() + ev1); }
/** * Copies this data and the RingBuffer data from device to host. */ void deviceToHost() { __startTransaction(__getTransactionEvent()); ringBuffer->deviceToHost(); EventTask ev1 = __endTransaction(); __startTransaction(__getTransactionEvent()); GridBuffer<VALUE, DIM1, BORDERVALUE>::deviceToHost(); EventTask ev2 = __endTransaction(); __setTransactionEvent(ev1 + ev2); }
/* host constructor initializing member : random number generator */ ThomasFermi_Impl(const uint32_t currentStep) : randomGen(RNGFactory::createRandom<Distribution>()) { /* create handle for access to host and device data */ DataConnector &dc = Environment<>::get().DataConnector(); /* The compiler is allowed to evaluate an expression that does not depend on a template parameter * even if the class is never instantiated. In that case static assert is always * evaluated (e.g. with clang), this results in an error if the condition is false. * http://www.boost.org/doc/libs/1_60_0/doc/html/boost_staticassert.html * * A workaround is to add a template dependency to the expression. * `sizeof(ANY_TYPE) != 0` is always true and defers the evaluation. */ PMACC_CASSERT_MSG( _please_allocate_at_least_two_FieldTmp_slots_in_memory_param, ( fieldTmpNumSlots >= 2 ) && ( sizeof( T_IonizationAlgorithm ) != 0 ) ); /* initialize pointers on host-side density-/energy density field databoxes */ auto density = dc.get< FieldTmp >( FieldTmp::getUniqueId( 0 ), true ); auto eneKinDens = dc.get< FieldTmp >( FieldTmp::getUniqueId( 1 ), true ); /* reset density and kinetic energy values to zero */ density->getGridBuffer().getDeviceBuffer().setValue( FieldTmp::ValueType( 0. ) ); eneKinDens->getGridBuffer().getDeviceBuffer().setValue( FieldTmp::ValueType( 0. ) ); /* load species without copying the particle data to the host */ auto srcSpecies = dc.get< SrcSpecies >( SrcSpecies::FrameType::getName(), true ); /* kernel call for weighted ion density calculation */ density->template computeValue< CORE + BORDER, DensitySolver >(*srcSpecies, currentStep); dc.releaseData( SrcSpecies::FrameType::getName() ); EventTask densityEvent = density->asyncCommunication( __getTransactionEvent() ); densityEvent += density->asyncCommunicationGather( densityEvent ); /* load species without copying the particle data to the host */ auto destSpecies = dc.get< DestSpecies >( DestSpecies::FrameType::getName(), true ); /* kernel call for weighted electron energy density calculation */ eneKinDens->template computeValue< CORE + BORDER, EnergyDensitySolver >(*destSpecies, currentStep); dc.releaseData( DestSpecies::FrameType::getName() ); EventTask eneKinEvent = eneKinDens->asyncCommunication( __getTransactionEvent() ); eneKinEvent += eneKinDens->asyncCommunicationGather( eneKinEvent ); /* contributions from neighboring GPUs to our border area */ __setTransactionEvent( densityEvent + eneKinEvent ); /* initialize device-side density- and energy density field databox pointers */ rhoBox = density->getDeviceDataBox(); eneBox = eneKinDens->getDeviceDataBox(); }
void shiftParticles() { StrideMapping<AREA, DIM3, MappingDesc> mapper(this->cellDescription); ParticlesBoxType pBox = particlesBuffer->getDeviceParticleBox(); __startTransaction(__getTransactionEvent()); do { __cudaKernel(kernelShiftParticles) (mapper.getGridDim(), TileSize) (pBox, mapper); } while (mapper.next()); __setTransactionEvent(__endTransaction()); }
/** * Resets all internal buffers. */ void reset() { __startTransaction(__getTransactionEvent()); frames->reset(false); frames->initialFillBuffer(); EventTask ev1 = __endTransaction(); __startTransaction(__getTransactionEvent()); superCells->getDeviceBuffer().setValue(SuperCell<vint_t > ()); superCells->getHostBuffer().setValue(SuperCell<vint_t > ()); /*nextFrames->getDeviceBuffer().setValue(INV_IDX);//!\todo: is this needed? On device we set any new frame values to INVALID_INDEX prevFrames->getDeviceBuffer().setValue(INV_IDX);//!\todo: is this needed? On device we set any new frame values to INVALID_INDEX nextFrames->getHostBuffer().setValue(INV_IDX);//!\todo: is this needed? On device we set any new frame values to INVALID_INDEX prevFrames->getHostBuffer().setValue(INV_IDX);//!\todo: is this needed? On device we set any new frame values to INVALID_INDEX */ __setTransactionEvent(__endTransaction() + ev1); }
void setCurrentSize(const size_t size) { // do host and device setCurrentSize parallel EventTask split = __getTransactionEvent(); __startTransaction(split); stackIndexer.getHostBuffer().setCurrentSize(size); stack.getHostBuffer().setCurrentSize(size); EventTask e1 = __endTransaction(); __startTransaction(split); stackIndexer.getDeviceBuffer().setCurrentSize(size); EventTask e2 = __endTransaction(); __startTransaction(split); stack.getDeviceBuffer().setCurrentSize(size); EventTask e3 = __endTransaction(); __setTransactionEvent(e1 + e2 + e3); }
/** * Starts copying data from device to host. */ void deviceToHost() { __startTransaction(__getTransactionEvent()); frames->deviceToHost(); EventTask ev1 = __endTransaction(); __startTransaction(__getTransactionEvent()); superCells->deviceToHost(); EventTask ev2 = __endTransaction(); __startTransaction(__getTransactionEvent()); nextFrames->deviceToHost(); EventTask ev3 = __endTransaction(); __startTransaction(__getTransactionEvent()); prevFrames->deviceToHost(); EventTask ev4 = __endTransaction(); __setTransactionEvent(ev1 + ev2 + ev3 + ev4); }
/** * Starts sync data from own device buffer to neigbhor device buffer. * * Asynchronously starts syncronization data from internal DeviceBuffer using added * Exchange buffers. * This operation runs sequential to other code but intern asyncron * */ EventTask communication() { EventTask ev = this->asyncCommunication(__getTransactionEvent()); __setTransactionEvent(ev); return ev; }