HINLINE void operator()(RefWrapper<ThreadParams*> params, std::string prefix, const DomainInformation domInfo,const Space particleOffset) { log<picLog::INPUT_OUTPUT > ("HDF5: write species: %1%") % Hdf5FrameType::getName(); DataConnector &dc = DataConnector::getInstance(); /*load particle without copy particle data to host*/ ThisSpecies* speciesTmp = &(dc.getData<ThisSpecies >(ThisSpecies::FrameType::CommunicationTag, true)); // count total number of particles on the device uint64_cu totalNumParticles = 0; PMACC_AUTO(simBox, SubGrid<simDim>::getInstance().getSimulationBox()); log<picLog::INPUT_OUTPUT > ("HDF5: count particles: %1%") % Hdf5FrameType::getName(); totalNumParticles = PMacc::CountParticles::countOnDevice < CORE + BORDER > ( *speciesTmp, *(params.get()->cellDescription), domInfo.localDomainOffset, domInfo.domainSize); log<picLog::INPUT_OUTPUT > ("HDF5: Finish count particles: %1% = %2%") % Hdf5FrameType::getName() % totalNumParticles; Hdf5FrameType hostFrame; log<picLog::INPUT_OUTPUT > ("HDF5: malloc mapped memory: %1%") % Hdf5FrameType::getName(); /*malloc mapped memory*/ ForEach<typename Hdf5FrameType::ValueTypeSeq, MallocMemory<void> > mallocMem; mallocMem(byRef(hostFrame), totalNumParticles); log<picLog::INPUT_OUTPUT > ("HDF5: Finish malloc mapped memory: %1%") % Hdf5FrameType::getName(); if (totalNumParticles != 0) { log<picLog::INPUT_OUTPUT > ("HDF5: get mapped memory device pointer: %1%") % Hdf5FrameType::getName(); /*load device pointer of mapped memory*/ Hdf5FrameType deviceFrame; ForEach<typename Hdf5FrameType::ValueTypeSeq, GetDevicePtr<void> > getDevicePtr; getDevicePtr(byRef(deviceFrame), byRef(hostFrame)); log<picLog::INPUT_OUTPUT > ("HDF5: Finish get mapped memory device pointer: %1%") % Hdf5FrameType::getName(); log<picLog::INPUT_OUTPUT > ("HDF5: copy particle to host: %1%") % Hdf5FrameType::getName(); typedef bmpl::vector< PositionFilter3D<> > usedFilters; typedef typename FilterFactory<usedFilters>::FilterType MyParticleFilter; MyParticleFilter filter; /*activeate filter pipline if moving window is activated*/ filter.setStatus(MovingWindow::getInstance().isSlidingWindowActive()); filter.setWindowPosition(domInfo.localDomainOffset, domInfo.domainSize); dim3 block(TILE_SIZE); DataSpace<simDim> superCells = speciesTmp->getParticlesBuffer().getSuperCellsCount(); GridBuffer<int, DIM1> counterBuffer(DataSpace<DIM1>(1)); AreaMapping < CORE + BORDER, MappingDesc > mapper(*(params.get()->cellDescription)); __cudaKernel(copySpecies) (mapper.getGridDim(), block) (counterBuffer.getDeviceBuffer().getPointer(), deviceFrame, speciesTmp->getDeviceParticlesBox(), filter, particleOffset, /*relative to data domain (not to physical domain)*/ mapper ); counterBuffer.deviceToHost(); log<picLog::INPUT_OUTPUT > ("HDF5: memcpy particle counter to host: %1%") % Hdf5FrameType::getName(); __getTransactionEvent().waitForFinished(); log<picLog::INPUT_OUTPUT > ("HDF5: all events are finish: %1%") % Hdf5FrameType::getName(); /*this cost a little bit of time but hdf5 writing is slower^^*/ assert((uint64_cu) counterBuffer.getHostBuffer().getDataBox()[0] == totalNumParticles); } /*dump to hdf5 file*/ ForEach<typename Hdf5FrameType::ValueTypeSeq, hdf5::ParticleAttribute<void> > writeToHdf5; writeToHdf5(params, byRef(hostFrame), prefix + FrameType::getName(), domInfo, totalNumParticles); /*free host memory*/ ForEach<typename Hdf5FrameType::ValueTypeSeq, FreeMemory<void> > freeMem; freeMem(byRef(hostFrame)); log<picLog::INPUT_OUTPUT > ("HDF5: Finish write species: %1%") % Hdf5FrameType::getName(); }
/** Load species from HDF5 checkpoint file * * @param params thread params with domainwriter, ... * @param restartChunkSize number of particles processed in one kernel call */ HINLINE void operator()(ThreadParams* params, const uint32_t restartChunkSize) { log<picLog::INPUT_OUTPUT > ("HDF5: (begin) load species: %1%") % Hdf5FrameType::getName(); DataConnector &dc = Environment<>::get().DataConnector(); GridController<simDim> &gc = Environment<simDim>::get().GridController(); std::string subGroup = std::string("particles/") + FrameType::getName(); const PMacc::Selection<simDim>& localDomain = Environment<simDim>::get().SubGrid().getLocalDomain(); /* load particle without copying particle data to host */ ThisSpecies* speciesTmp = &(dc.getData<ThisSpecies >(ThisSpecies::FrameType::getName(), true)); /* count total number of particles on the device */ uint64_cu totalNumParticles = 0; /* load particles info table entry for this process particlesInfo is (part-count, scalar pos, x, y, z) */ typedef uint64_t uint64Quint[5]; uint64Quint particlesInfo[gc.getGlobalSize()]; Dimensions particlesInfoSizeRead; params->dataCollector->read(params->currentStep, (std::string(subGroup) + std::string("/particles_info")).c_str(), particlesInfoSizeRead, particlesInfo); assert(particlesInfoSizeRead[0] == gc.getGlobalSize()); /* search my entry (using my scalar position) in particlesInfo */ uint64_t particleOffset = 0; uint64_t myScalarPos = gc.getScalarPosition(); for (size_t i = 0; i < particlesInfoSizeRead[0]; ++i) { if (particlesInfo[i][1] == myScalarPos) { totalNumParticles = particlesInfo[i][0]; break; } particleOffset += particlesInfo[i][0]; } log<picLog::INPUT_OUTPUT > ("Loading %1% particles from offset %2%") % (long long unsigned) totalNumParticles % (long long unsigned) particleOffset; if (totalNumParticles != 0) { Hdf5FrameType hostFrame; log<picLog::INPUT_OUTPUT > ("HDF5: malloc mapped memory: %1%") % Hdf5FrameType::getName(); /*malloc mapped memory*/ ForEach<typename Hdf5FrameType::ValueTypeSeq, MallocMemory<bmpl::_1> > mallocMem; mallocMem(forward(hostFrame), totalNumParticles); log<picLog::INPUT_OUTPUT > ("HDF5: get mapped memory device pointer: %1%") % Hdf5FrameType::getName(); /*load device pointer of mapped memory*/ Hdf5FrameType deviceFrame; ForEach<typename Hdf5FrameType::ValueTypeSeq, GetDevicePtr<bmpl::_1> > getDevicePtr; getDevicePtr(forward(deviceFrame), forward(hostFrame)); ForEach<typename Hdf5FrameType::ValueTypeSeq, LoadParticleAttributesFromHDF5<bmpl::_1> > loadAttributes; loadAttributes(forward(params), forward(hostFrame), subGroup, particleOffset, totalNumParticles); dim3 block(PMacc::math::CT::volume<SuperCellSize>::type::value); /* counter is used to apply for work, count used frames and count loaded particles * [0] -> offset for loading particles * [1] -> number of loaded particles * [2] -> number of used frames * * all values are zero after initialization */ GridBuffer<uint32_t, DIM1> counterBuffer(DataSpace<DIM1>(3)); const uint32_t cellsInSuperCell = PMacc::math::CT::volume<SuperCellSize>::type::value; const uint32_t iterationsForLoad = ceil(double(totalNumParticles) / double(restartChunkSize)); uint32_t leftOverParticles = totalNumParticles; __startAtomicTransaction(__getTransactionEvent()); for (uint32_t i = 0; i < iterationsForLoad; ++i) { /* only load a chunk of particles per iteration to avoid blow up of frame usage */ uint32_t currentChunkSize = std::min(leftOverParticles, restartChunkSize); log<picLog::INPUT_OUTPUT > ("HDF5: load particles on device chunk offset=%1%; chunk size=%2%; left particles %3%") % (i * restartChunkSize) % currentChunkSize % leftOverParticles; __cudaKernel(copySpeciesGlobal2Local) (ceil(double(currentChunkSize) / double(cellsInSuperCell)), cellsInSuperCell) (counterBuffer.getDeviceBuffer().getDataBox(), speciesTmp->getDeviceParticlesBox(), deviceFrame, (int) totalNumParticles, localDomain.offset, /*relative to data domain (not to physical domain)*/ *(params->cellDescription) ); speciesTmp->fillAllGaps(); leftOverParticles -= currentChunkSize; } __setTransactionEvent(__endTransaction()); counterBuffer.deviceToHost(); log<picLog::INPUT_OUTPUT > ("HDF5: wait for last processed chunk: %1%") % Hdf5FrameType::getName(); __getTransactionEvent().waitForFinished(); log<picLog::INPUT_OUTPUT > ("HDF5: used frames to load particles: %1%") % counterBuffer.getHostBuffer().getDataBox()[2]; if ((uint64_cu) counterBuffer.getHostBuffer().getDataBox()[1] != totalNumParticles) { log<picLog::INPUT_OUTPUT >("HDF5: error load species | counter is %1% but should %2%") % counterBuffer.getHostBuffer().getDataBox()[1] % totalNumParticles; } assert((uint64_cu) counterBuffer.getHostBuffer().getDataBox()[1] == totalNumParticles); /*free host memory*/ ForEach<typename Hdf5FrameType::ValueTypeSeq, FreeMemory<bmpl::_1> > freeMem; freeMem(forward(hostFrame)); log<picLog::INPUT_OUTPUT > ("HDF5: ( end ) load species: %1%") % Hdf5FrameType::getName(); } }
HINLINE void operator()(RefWrapper<ThreadParams*> params, std::string subGroup, const Space particleOffset) { log<picLog::INPUT_OUTPUT > ("HDF5: (begin) write species: %1%") % Hdf5FrameType::getName(); DataConnector &dc = Environment<>::get().DataConnector(); /* load particle without copy particle data to host */ ThisSpecies* speciesTmp = &(dc.getData<ThisSpecies >(ThisSpecies::FrameType::getName(), true)); /* count total number of particles on the device */ uint64_cu totalNumParticles = 0; log<picLog::INPUT_OUTPUT > ("HDF5: (begin) count particles: %1%") % Hdf5FrameType::getName(); totalNumParticles = PMacc::CountParticles::countOnDevice < CORE + BORDER > ( *speciesTmp, *(params.get()->cellDescription), params.get()->localWindowToDomainOffset, params.get()->window.localDimensions.size); log<picLog::INPUT_OUTPUT > ("HDF5: ( end ) count particles: %1% = %2%") % Hdf5FrameType::getName() % totalNumParticles; Hdf5FrameType hostFrame; log<picLog::INPUT_OUTPUT > ("HDF5: (begin) malloc mapped memory: %1%") % Hdf5FrameType::getName(); /*malloc mapped memory*/ ForEach<typename Hdf5FrameType::ValueTypeSeq, MallocMemory<bmpl::_1> > mallocMem; mallocMem(byRef(hostFrame), totalNumParticles); log<picLog::INPUT_OUTPUT > ("HDF5: ( end ) malloc mapped memory: %1%") % Hdf5FrameType::getName(); if (totalNumParticles != 0) { log<picLog::INPUT_OUTPUT > ("HDF5: (begin) get mapped memory device pointer: %1%") % Hdf5FrameType::getName(); /*load device pointer of mapped memory*/ Hdf5FrameType deviceFrame; ForEach<typename Hdf5FrameType::ValueTypeSeq, GetDevicePtr<bmpl::_1> > getDevicePtr; getDevicePtr(byRef(deviceFrame), byRef(hostFrame)); log<picLog::INPUT_OUTPUT > ("HDF5: ( end ) get mapped memory device pointer: %1%") % Hdf5FrameType::getName(); log<picLog::INPUT_OUTPUT > ("HDF5: (begin) copy particle to host: %1%") % Hdf5FrameType::getName(); typedef bmpl::vector< typename GetPositionFilter<simDim>::type > usedFilters; typedef typename FilterFactory<usedFilters>::FilterType MyParticleFilter; MyParticleFilter filter; /* activate filter pipeline if moving window is activated */ filter.setStatus(MovingWindow::getInstance().isSlidingWindowActive()); filter.setWindowPosition(params.get()->localWindowToDomainOffset, params.get()->window.localDimensions.size); dim3 block(TILE_SIZE); GridBuffer<int, DIM1> counterBuffer(DataSpace<DIM1>(1)); AreaMapping < CORE + BORDER, MappingDesc > mapper(*(params.get()->cellDescription)); __cudaKernel(copySpecies) (mapper.getGridDim(), block) (counterBuffer.getDeviceBuffer().getPointer(), deviceFrame, speciesTmp->getDeviceParticlesBox(), filter, particleOffset, /*relative to data domain (not to physical domain)*/ mapper ); counterBuffer.deviceToHost(); log<picLog::INPUT_OUTPUT > ("HDF5: ( end ) copy particle to host: %1%") % Hdf5FrameType::getName(); __getTransactionEvent().waitForFinished(); log<picLog::INPUT_OUTPUT > ("HDF5: all events are finished: %1%") % Hdf5FrameType::getName(); /*this cost a little bit of time but hdf5 writing is slower^^*/ assert((uint64_cu) counterBuffer.getHostBuffer().getDataBox()[0] == totalNumParticles); } /*dump to hdf5 file*/ ForEach<typename Hdf5FrameType::ValueTypeSeq, hdf5::ParticleAttribute<bmpl::_1> > writeToHdf5; writeToHdf5(params, byRef(hostFrame), std::string("particles/") + FrameType::getName() + std::string("/") + subGroup, totalNumParticles); /* write meta attributes for species */ writeMetaAttributes(params.get()); /*write species counter table to hdf5 file*/ log<picLog::INPUT_OUTPUT > ("HDF5: (begin) writing particle index table for %1%") % Hdf5FrameType::getName(); { ColTypeUInt64_5Array ctUInt64_5; GridController<simDim>& gc = Environment<simDim>::get().GridController(); const size_t pos_offset = 2; /* particlesMetaInfo = (num particles, scalar position, particle offset x, y, z) */ uint64_t particlesMetaInfo[5] = {totalNumParticles, gc.getScalarPosition(), 0, 0, 0}; for (size_t d = 0; d < simDim; ++d) particlesMetaInfo[pos_offset + d] = particleOffset[d]; /* prevent that top (y) gpus have negative value here */ if (gc.getPosition().y() == 0) particlesMetaInfo[pos_offset + 1] = 0; if (particleOffset[1] < 0) // 1 == y particlesMetaInfo[pos_offset + 1] = 0; params.get()->dataCollector->write( params.get()->currentStep, Dimensions(gc.getGlobalSize(), 1, 1), Dimensions(gc.getGlobalRank(), 0, 0), ctUInt64_5, 1, Dimensions(1, 1, 1), (std::string("particles/") + FrameType::getName() + std::string("/") + subGroup + std::string("/particles_info")).c_str(), particlesMetaInfo); } log<picLog::INPUT_OUTPUT > ("HDF5: ( end ) writing particle index table for %1%") % Hdf5FrameType::getName(); /*free host memory*/ ForEach<typename Hdf5FrameType::ValueTypeSeq, FreeMemory<bmpl::_1> > freeMem; freeMem(byRef(hostFrame)); log<picLog::INPUT_OUTPUT > ("HDF5: ( end ) writing species: %1%") % Hdf5FrameType::getName(); }