/** * Starts copying data from host to device. */ void hostToDevice() { __startTransaction(__getTransactionEvent()); frames->hostToDevice(); EventTask ev1 = __endTransaction(); __startTransaction(__getTransactionEvent()); superCells->hostToDevice(); EventTask ev2 = __endTransaction(); __startTransaction(__getTransactionEvent()); nextFrames->hostToDevice(); EventTask ev3 = __endTransaction(); __startTransaction(__getTransactionEvent()); prevFrames->hostToDevice(); EventTask ev4 = __endTransaction(); __setTransactionEvent(ev1 + ev2 + ev3 + ev4); }
EventTask asyncReceive(EventTask serialEvent, uint32_t recvEx) { if (hasReceiveExchange(recvEx)) { __startAtomicTransaction(serialEvent + receiveEvents[recvEx]); receiveEvents[recvEx] = receiveExchanges[recvEx]->startReceive(); __endTransaction(); return receiveEvents[recvEx]; } return EventTask(); }
bool executeIntern() { switch (state) { case Init: break; case WaitForReceive: if (NULL == Environment<>::get().Manager().getITaskIfNotFinished(lastReceiveEvent.getTaskId())) { state = InitInsert; //bash is finished __startTransaction(); lastSize = parBase.getParticlesBuffer().getReceiveExchangeStack(exchange).getHostParticlesCurrentSize(); parBase.insertParticles(exchange); // std::cout<<"brecv = "<<parBase.getParticlesBuffer().getReceiveExchangeStack(exchange).getHostCurrentSize()<<std::endl; tmpEvent = __endTransaction(); state = WaitForInsert; } break; case InitInsert: break; case WaitForInsert: if (NULL == Environment<>::get().Manager().getITaskIfNotFinished(tmpEvent.getTaskId())) { state=Wait; assert(lastSize <= maxSize); //check for next bash round if (lastSize == maxSize) { std::cerr << "recv max size " << maxSize << " particles" << std::endl; init(); //call init and run a full send cycle } else { state = Finished; return true; } } break; case Wait: break; case Finished: return true; default: return false; } return false; }
EventTask asyncSend(EventTask serialEvent, uint32_t sendEx, EventTask &gpuFree) { if (hasSendExchange(sendEx)) { __startAtomicTransaction(serialEvent + sendEvents[sendEx]); sendEvents[sendEx] = sendExchanges[sendEx]->startSend(gpuFree); __endTransaction(); /* add only the copy event, because all work on gpu can run after data is copyed */ return gpuFree; } return EventTask(); }
virtual void init() { state = Init; EventTask serialEvent = __getTransactionEvent(); for (int i = 1; i < Exchanges; ++i) { if (buffer.getGridBuffer().hasSendExchange(i)) { __startAtomicTransaction(serialEvent); FieldFactory::getInstance().createTaskFieldSendExchange(buffer, i); tmpEvent += __endTransaction(); } } state = WaitForSend; }
void shiftParticles() { StrideMapping<AREA, DIM3, MappingDesc> mapper(this->cellDescription); ParticlesBoxType pBox = particlesBuffer->getDeviceParticleBox(); __startTransaction(__getTransactionEvent()); do { __cudaKernel(kernelShiftParticles) (mapper.getGridDim(), TileSize) (pBox, mapper); } while (mapper.next()); __setTransactionEvent(__endTransaction()); }
HINLINE void operator()( T_StorageTuple& tuple, const uint32_t currentStep, const T_Event eventInt, T_Event& updateEvent, T_Event& commEvent ) const { typedef typename HasFlag<FrameType, particlePusher<> >::type hasPusher; if (hasPusher::value) { PMACC_AUTO(speciesPtr, tuple[SpeciesName()]); __startTransaction(eventInt); speciesPtr->update(currentStep); commEvent += speciesPtr->asyncCommunication(__getTransactionEvent()); updateEvent += __endTransaction(); } }
bool executeIntern() { switch (state) { case InitDone: break; case DeviceToHostFinished: state = SendDone; __startTransaction(); Environment<>::get().Factory().createTaskSendMPI(exchange, this); __endTransaction(); //we need no blocking because we get a singnal if transaction is finished break; case SendDone: break; case Finish: return true; default: return false; } return false; }
virtual void init() { __startTransaction(); state = InitDone; if (exchange->hasDeviceDoubleBuffer()) { Environment<>::get().Factory().createTaskCopyDeviceToDevice(exchange->getDeviceBuffer(), exchange->getDeviceDoubleBuffer() ); copyEvent = Environment<>::get().Factory().createTaskCopyDeviceToHost(exchange->getDeviceDoubleBuffer(), exchange->getHostBuffer(), this); } else { copyEvent = Environment<>::get().Factory().createTaskCopyDeviceToHost(exchange->getDeviceBuffer(), exchange->getHostBuffer(), this); } __endTransaction(); //we need no blocking because we get a singnal if transaction is finished }
bool executeIntern() { switch (state) { case WaitForReceived: break; case RunCopy: state = WaitForFinish; __startAtomicTransaction(); exchange->getHostBuffer().setCurrentSize(newBufferSize); if (exchange->hasDeviceDoubleBuffer()) { Factory::getInstance().createTaskCopyHostToDevice(exchange->getHostBuffer(), exchange->getDeviceDoubleBuffer()); Factory::getInstance().createTaskCopyDeviceToDevice(exchange->getDeviceDoubleBuffer(), exchange->getDeviceBuffer(), this); } else { Factory::getInstance().createTaskCopyHostToDevice(exchange->getHostBuffer(), exchange->getDeviceBuffer(), this); } __endTransaction(); break; case WaitForFinish: break; case Finish: return true; default: return false; } return false; }
void event(id_t, EventType type, IEventData* data) { switch (type) { case RECVFINISHED: if (data != NULL) { __startTransaction(); //no blocking EventDataReceive *rdata = static_cast<EventDataReceive*> (data); // std::cout<<" data rec "<<rdata->getReceivedCount()/sizeof(TYPE)<<std::endl; newBufferSize = rdata->getReceivedCount() / sizeof (TYPE); __endTransaction(); state = RunCopy; executeIntern(); } break; case COPYHOST2DEVICE: case COPYDEVICE2DEVICE: state = Finish; break; default: return; } }