static uint64_cu countOnDevice(PBuffer& buffer, CellDesc cellDescription, const Space& origin, const Space& size) { typedef bmpl::vector< typename GetPositionFilter<Space::Dim>::type > usedFilters; typedef typename FilterFactory<usedFilters>::FilterType MyParticleFilter; MyParticleFilter filter; filter.setStatus(true); /*activeate filter pipline*/ filter.setWindowPosition(origin, size); return PMacc::CountParticles::countOnDevice<AREA>(buffer, cellDescription, filter); }
HINLINE void operator()(RefWrapper<ThreadParams*> params, std::string prefix, const DomainInformation domInfo,const Space particleOffset) { log<picLog::INPUT_OUTPUT > ("HDF5: write species: %1%") % Hdf5FrameType::getName(); DataConnector &dc = DataConnector::getInstance(); /*load particle without copy particle data to host*/ ThisSpecies* speciesTmp = &(dc.getData<ThisSpecies >(ThisSpecies::FrameType::CommunicationTag, true)); // count total number of particles on the device uint64_cu totalNumParticles = 0; PMACC_AUTO(simBox, SubGrid<simDim>::getInstance().getSimulationBox()); log<picLog::INPUT_OUTPUT > ("HDF5: count particles: %1%") % Hdf5FrameType::getName(); totalNumParticles = PMacc::CountParticles::countOnDevice < CORE + BORDER > ( *speciesTmp, *(params.get()->cellDescription), domInfo.localDomainOffset, domInfo.domainSize); log<picLog::INPUT_OUTPUT > ("HDF5: Finish count particles: %1% = %2%") % Hdf5FrameType::getName() % totalNumParticles; Hdf5FrameType hostFrame; log<picLog::INPUT_OUTPUT > ("HDF5: malloc mapped memory: %1%") % Hdf5FrameType::getName(); /*malloc mapped memory*/ ForEach<typename Hdf5FrameType::ValueTypeSeq, MallocMemory<void> > mallocMem; mallocMem(byRef(hostFrame), totalNumParticles); log<picLog::INPUT_OUTPUT > ("HDF5: Finish malloc mapped memory: %1%") % Hdf5FrameType::getName(); if (totalNumParticles != 0) { log<picLog::INPUT_OUTPUT > ("HDF5: get mapped memory device pointer: %1%") % Hdf5FrameType::getName(); /*load device pointer of mapped memory*/ Hdf5FrameType deviceFrame; ForEach<typename Hdf5FrameType::ValueTypeSeq, GetDevicePtr<void> > getDevicePtr; getDevicePtr(byRef(deviceFrame), byRef(hostFrame)); log<picLog::INPUT_OUTPUT > ("HDF5: Finish get mapped memory device pointer: %1%") % Hdf5FrameType::getName(); log<picLog::INPUT_OUTPUT > ("HDF5: copy particle to host: %1%") % Hdf5FrameType::getName(); typedef bmpl::vector< PositionFilter3D<> > usedFilters; typedef typename FilterFactory<usedFilters>::FilterType MyParticleFilter; MyParticleFilter filter; /*activeate filter pipline if moving window is activated*/ filter.setStatus(MovingWindow::getInstance().isSlidingWindowActive()); filter.setWindowPosition(domInfo.localDomainOffset, domInfo.domainSize); dim3 block(TILE_SIZE); DataSpace<simDim> superCells = speciesTmp->getParticlesBuffer().getSuperCellsCount(); GridBuffer<int, DIM1> counterBuffer(DataSpace<DIM1>(1)); AreaMapping < CORE + BORDER, MappingDesc > mapper(*(params.get()->cellDescription)); __cudaKernel(copySpecies) (mapper.getGridDim(), block) (counterBuffer.getDeviceBuffer().getPointer(), deviceFrame, speciesTmp->getDeviceParticlesBox(), filter, particleOffset, /*relative to data domain (not to physical domain)*/ mapper ); counterBuffer.deviceToHost(); log<picLog::INPUT_OUTPUT > ("HDF5: memcpy particle counter to host: %1%") % Hdf5FrameType::getName(); __getTransactionEvent().waitForFinished(); log<picLog::INPUT_OUTPUT > ("HDF5: all events are finish: %1%") % Hdf5FrameType::getName(); /*this cost a little bit of time but hdf5 writing is slower^^*/ assert((uint64_cu) counterBuffer.getHostBuffer().getDataBox()[0] == totalNumParticles); } /*dump to hdf5 file*/ ForEach<typename Hdf5FrameType::ValueTypeSeq, hdf5::ParticleAttribute<void> > writeToHdf5; writeToHdf5(params, byRef(hostFrame), prefix + FrameType::getName(), domInfo, totalNumParticles); /*free host memory*/ ForEach<typename Hdf5FrameType::ValueTypeSeq, FreeMemory<void> > freeMem; freeMem(byRef(hostFrame)); log<picLog::INPUT_OUTPUT > ("HDF5: Finish write species: %1%") % Hdf5FrameType::getName(); }
HINLINE void operator()(RefWrapper<ThreadParams*> params, std::string subGroup, const Space particleOffset) { log<picLog::INPUT_OUTPUT > ("HDF5: (begin) write species: %1%") % Hdf5FrameType::getName(); DataConnector &dc = Environment<>::get().DataConnector(); /* load particle without copy particle data to host */ ThisSpecies* speciesTmp = &(dc.getData<ThisSpecies >(ThisSpecies::FrameType::getName(), true)); /* count total number of particles on the device */ uint64_cu totalNumParticles = 0; log<picLog::INPUT_OUTPUT > ("HDF5: (begin) count particles: %1%") % Hdf5FrameType::getName(); totalNumParticles = PMacc::CountParticles::countOnDevice < CORE + BORDER > ( *speciesTmp, *(params.get()->cellDescription), params.get()->localWindowToDomainOffset, params.get()->window.localDimensions.size); log<picLog::INPUT_OUTPUT > ("HDF5: ( end ) count particles: %1% = %2%") % Hdf5FrameType::getName() % totalNumParticles; Hdf5FrameType hostFrame; log<picLog::INPUT_OUTPUT > ("HDF5: (begin) malloc mapped memory: %1%") % Hdf5FrameType::getName(); /*malloc mapped memory*/ ForEach<typename Hdf5FrameType::ValueTypeSeq, MallocMemory<bmpl::_1> > mallocMem; mallocMem(byRef(hostFrame), totalNumParticles); log<picLog::INPUT_OUTPUT > ("HDF5: ( end ) malloc mapped memory: %1%") % Hdf5FrameType::getName(); if (totalNumParticles != 0) { log<picLog::INPUT_OUTPUT > ("HDF5: (begin) get mapped memory device pointer: %1%") % Hdf5FrameType::getName(); /*load device pointer of mapped memory*/ Hdf5FrameType deviceFrame; ForEach<typename Hdf5FrameType::ValueTypeSeq, GetDevicePtr<bmpl::_1> > getDevicePtr; getDevicePtr(byRef(deviceFrame), byRef(hostFrame)); log<picLog::INPUT_OUTPUT > ("HDF5: ( end ) get mapped memory device pointer: %1%") % Hdf5FrameType::getName(); log<picLog::INPUT_OUTPUT > ("HDF5: (begin) copy particle to host: %1%") % Hdf5FrameType::getName(); typedef bmpl::vector< typename GetPositionFilter<simDim>::type > usedFilters; typedef typename FilterFactory<usedFilters>::FilterType MyParticleFilter; MyParticleFilter filter; /* activate filter pipeline if moving window is activated */ filter.setStatus(MovingWindow::getInstance().isSlidingWindowActive()); filter.setWindowPosition(params.get()->localWindowToDomainOffset, params.get()->window.localDimensions.size); dim3 block(TILE_SIZE); GridBuffer<int, DIM1> counterBuffer(DataSpace<DIM1>(1)); AreaMapping < CORE + BORDER, MappingDesc > mapper(*(params.get()->cellDescription)); __cudaKernel(copySpecies) (mapper.getGridDim(), block) (counterBuffer.getDeviceBuffer().getPointer(), deviceFrame, speciesTmp->getDeviceParticlesBox(), filter, particleOffset, /*relative to data domain (not to physical domain)*/ mapper ); counterBuffer.deviceToHost(); log<picLog::INPUT_OUTPUT > ("HDF5: ( end ) copy particle to host: %1%") % Hdf5FrameType::getName(); __getTransactionEvent().waitForFinished(); log<picLog::INPUT_OUTPUT > ("HDF5: all events are finished: %1%") % Hdf5FrameType::getName(); /*this cost a little bit of time but hdf5 writing is slower^^*/ assert((uint64_cu) counterBuffer.getHostBuffer().getDataBox()[0] == totalNumParticles); } /*dump to hdf5 file*/ ForEach<typename Hdf5FrameType::ValueTypeSeq, hdf5::ParticleAttribute<bmpl::_1> > writeToHdf5; writeToHdf5(params, byRef(hostFrame), std::string("particles/") + FrameType::getName() + std::string("/") + subGroup, totalNumParticles); /* write meta attributes for species */ writeMetaAttributes(params.get()); /*write species counter table to hdf5 file*/ log<picLog::INPUT_OUTPUT > ("HDF5: (begin) writing particle index table for %1%") % Hdf5FrameType::getName(); { ColTypeUInt64_5Array ctUInt64_5; GridController<simDim>& gc = Environment<simDim>::get().GridController(); const size_t pos_offset = 2; /* particlesMetaInfo = (num particles, scalar position, particle offset x, y, z) */ uint64_t particlesMetaInfo[5] = {totalNumParticles, gc.getScalarPosition(), 0, 0, 0}; for (size_t d = 0; d < simDim; ++d) particlesMetaInfo[pos_offset + d] = particleOffset[d]; /* prevent that top (y) gpus have negative value here */ if (gc.getPosition().y() == 0) particlesMetaInfo[pos_offset + 1] = 0; if (particleOffset[1] < 0) // 1 == y particlesMetaInfo[pos_offset + 1] = 0; params.get()->dataCollector->write( params.get()->currentStep, Dimensions(gc.getGlobalSize(), 1, 1), Dimensions(gc.getGlobalRank(), 0, 0), ctUInt64_5, 1, Dimensions(1, 1, 1), (std::string("particles/") + FrameType::getName() + std::string("/") + subGroup + std::string("/particles_info")).c_str(), particlesMetaInfo); } log<picLog::INPUT_OUTPUT > ("HDF5: ( end ) writing particle index table for %1%") % Hdf5FrameType::getName(); /*free host memory*/ ForEach<typename Hdf5FrameType::ValueTypeSeq, FreeMemory<bmpl::_1> > freeMem; freeMem(byRef(hostFrame)); log<picLog::INPUT_OUTPUT > ("HDF5: ( end ) writing species: %1%") % Hdf5FrameType::getName(); }