HINLINE void operator()( const RefWrapper<ThreadParams*> params, const RefWrapper<FrameType> frame, const size_t elements) { typedef T_Identifier Identifier; typedef typename Identifier::type ValueType; const uint32_t components = GetNComponents<ValueType>::value; typedef typename GetComponentsType<ValueType>::type ComponentType; log<picLog::INPUT_OUTPUT > ("ADIOS: (begin) write species attribute: %1%") % Identifier::getName(); ComponentType* tmpBfr = new ComponentType[elements]; for (uint32_t d = 0; d < components; d++) { ValueType* dataPtr = frame.get().getIdentifier(Identifier()).getPointer(); /* copy strided data from source to temporary buffer */ for (size_t i = 0; i < elements; ++i) { tmpBfr[i] = ((ComponentType*)dataPtr)[i * components]; } int64_t adiosAttributeVarId = *(params.get()->adiosParticleAttrVarIds.begin()); params.get()->adiosParticleAttrVarIds.pop_front(); ADIOS_CMD(adios_write_byid(params.get()->adiosFileHandle, adiosAttributeVarId, tmpBfr)); } __deleteArray(tmpBfr); log<picLog::INPUT_OUTPUT > ("ADIOS: ( end ) write species attribute: %1%") % Identifier::getName(); }
HINLINE void operator_impl(RefWrapper<ThreadParams*> params) { DataConnector &dc = Environment<>::get().DataConnector(); /*## update field ##*/ /*load FieldTmp without copy data to host*/ FieldTmp* fieldTmp = &(dc.getData<FieldTmp > (FieldTmp::getName(), true)); /*load particle without copy particle data to host*/ Species* speciesTmp = &(dc.getData<Species >(Species::FrameType::getName(), true)); fieldTmp->getGridBuffer().getDeviceBuffer().setValue(ValueType(0.0)); /*run algorithm*/ fieldTmp->computeValue < CORE + BORDER, Solver > (*speciesTmp, params.get()->currentStep); EventTask fieldTmpEvent = fieldTmp->asyncCommunication(__getTransactionEvent()); __setTransactionEvent(fieldTmpEvent); /* copy data to host that we can write same to disk*/ fieldTmp->getGridBuffer().deviceToHost(); dc.releaseData(Species::FrameType::getName()); /*## finish update field ##*/ params.get()->gridLayout = fieldTmp->getGridLayout(); /*write data to HDF5 file*/ Field::writeField(params.get(), getName(), getUnit(), fieldTmp->getHostDataBox(), ValueType()); dc.releaseData(FieldTmp::getName()); }
HINLINE void operator()(RefWrapper<ValueType> dest, RefWrapper<ValueType> src) const { type* ptr = NULL; type* srcPtr = src.get().getIdentifier(T_Type()).getPointer(); if (srcPtr != NULL) { CUDA_CHECK(cudaHostGetDevicePointer(&ptr, srcPtr, 0)); } dest.get().getIdentifier(T_Type()) = VectorDataBox<type>(ptr); }
HDINLINE void operator()(RefWrapper<ThreadParams*> params) { #ifndef __CUDA_ARCH__ DataConnector &dc = Environment<>::get().DataConnector(); T* field = &(dc.getData<T > (T::getName())); params.get()->gridLayout = field->getGridLayout(); Field::writeField(params.get(), T::getName(), getUnit(), field->getHostDataBox(), ValueType()); dc.releaseData(T::getName()); #endif }
HINLINE void operator()(RefWrapper<ValueType> v1, const size_t size) const { type* ptr = NULL; if (size != 0) { CUDA_CHECK(cudaHostAlloc(&ptr, size * sizeof (type), cudaHostAllocMapped)); } v1.get().getIdentifier(T_Type()) = VectorDataBox<type>(ptr); }
HINLINE void operator()(RefWrapper<ValueType> value) const { type* ptr = value.get().getIdentifier(T_Type()).getPointer(); if (ptr != NULL) CUDA_CHECK(cudaFreeHost(ptr)); }
HINLINE void operator()(RefWrapper<ThreadParams*> params, std::string prefix, const DomainInformation domInfo,const Space particleOffset) { log<picLog::INPUT_OUTPUT > ("HDF5: write species: %1%") % Hdf5FrameType::getName(); DataConnector &dc = DataConnector::getInstance(); /*load particle without copy particle data to host*/ ThisSpecies* speciesTmp = &(dc.getData<ThisSpecies >(ThisSpecies::FrameType::CommunicationTag, true)); // count total number of particles on the device uint64_cu totalNumParticles = 0; PMACC_AUTO(simBox, SubGrid<simDim>::getInstance().getSimulationBox()); log<picLog::INPUT_OUTPUT > ("HDF5: count particles: %1%") % Hdf5FrameType::getName(); totalNumParticles = PMacc::CountParticles::countOnDevice < CORE + BORDER > ( *speciesTmp, *(params.get()->cellDescription), domInfo.localDomainOffset, domInfo.domainSize); log<picLog::INPUT_OUTPUT > ("HDF5: Finish count particles: %1% = %2%") % Hdf5FrameType::getName() % totalNumParticles; Hdf5FrameType hostFrame; log<picLog::INPUT_OUTPUT > ("HDF5: malloc mapped memory: %1%") % Hdf5FrameType::getName(); /*malloc mapped memory*/ ForEach<typename Hdf5FrameType::ValueTypeSeq, MallocMemory<void> > mallocMem; mallocMem(byRef(hostFrame), totalNumParticles); log<picLog::INPUT_OUTPUT > ("HDF5: Finish malloc mapped memory: %1%") % Hdf5FrameType::getName(); if (totalNumParticles != 0) { log<picLog::INPUT_OUTPUT > ("HDF5: get mapped memory device pointer: %1%") % Hdf5FrameType::getName(); /*load device pointer of mapped memory*/ Hdf5FrameType deviceFrame; ForEach<typename Hdf5FrameType::ValueTypeSeq, GetDevicePtr<void> > getDevicePtr; getDevicePtr(byRef(deviceFrame), byRef(hostFrame)); log<picLog::INPUT_OUTPUT > ("HDF5: Finish get mapped memory device pointer: %1%") % Hdf5FrameType::getName(); log<picLog::INPUT_OUTPUT > ("HDF5: copy particle to host: %1%") % Hdf5FrameType::getName(); typedef bmpl::vector< PositionFilter3D<> > usedFilters; typedef typename FilterFactory<usedFilters>::FilterType MyParticleFilter; MyParticleFilter filter; /*activeate filter pipline if moving window is activated*/ filter.setStatus(MovingWindow::getInstance().isSlidingWindowActive()); filter.setWindowPosition(domInfo.localDomainOffset, domInfo.domainSize); dim3 block(TILE_SIZE); DataSpace<simDim> superCells = speciesTmp->getParticlesBuffer().getSuperCellsCount(); GridBuffer<int, DIM1> counterBuffer(DataSpace<DIM1>(1)); AreaMapping < CORE + BORDER, MappingDesc > mapper(*(params.get()->cellDescription)); __cudaKernel(copySpecies) (mapper.getGridDim(), block) (counterBuffer.getDeviceBuffer().getPointer(), deviceFrame, speciesTmp->getDeviceParticlesBox(), filter, particleOffset, /*relative to data domain (not to physical domain)*/ mapper ); counterBuffer.deviceToHost(); log<picLog::INPUT_OUTPUT > ("HDF5: memcpy particle counter to host: %1%") % Hdf5FrameType::getName(); __getTransactionEvent().waitForFinished(); log<picLog::INPUT_OUTPUT > ("HDF5: all events are finish: %1%") % Hdf5FrameType::getName(); /*this cost a little bit of time but hdf5 writing is slower^^*/ assert((uint64_cu) counterBuffer.getHostBuffer().getDataBox()[0] == totalNumParticles); } /*dump to hdf5 file*/ ForEach<typename Hdf5FrameType::ValueTypeSeq, hdf5::ParticleAttribute<void> > writeToHdf5; writeToHdf5(params, byRef(hostFrame), prefix + FrameType::getName(), domInfo, totalNumParticles); /*free host memory*/ ForEach<typename Hdf5FrameType::ValueTypeSeq, FreeMemory<void> > freeMem; freeMem(byRef(hostFrame)); log<picLog::INPUT_OUTPUT > ("HDF5: Finish write species: %1%") % Hdf5FrameType::getName(); }
HDINLINE void operator()(RefWrapper<T_T1> dest, const T_T2& src) { dest.get()[T_Key()]=src[T_Key()]; }
HINLINE void operator()(RefWrapper<ThreadParams*> params, std::string subGroup, const Space particleOffset) { log<picLog::INPUT_OUTPUT > ("HDF5: (begin) write species: %1%") % Hdf5FrameType::getName(); DataConnector &dc = Environment<>::get().DataConnector(); /* load particle without copy particle data to host */ ThisSpecies* speciesTmp = &(dc.getData<ThisSpecies >(ThisSpecies::FrameType::getName(), true)); /* count total number of particles on the device */ uint64_cu totalNumParticles = 0; log<picLog::INPUT_OUTPUT > ("HDF5: (begin) count particles: %1%") % Hdf5FrameType::getName(); totalNumParticles = PMacc::CountParticles::countOnDevice < CORE + BORDER > ( *speciesTmp, *(params.get()->cellDescription), params.get()->localWindowToDomainOffset, params.get()->window.localDimensions.size); log<picLog::INPUT_OUTPUT > ("HDF5: ( end ) count particles: %1% = %2%") % Hdf5FrameType::getName() % totalNumParticles; Hdf5FrameType hostFrame; log<picLog::INPUT_OUTPUT > ("HDF5: (begin) malloc mapped memory: %1%") % Hdf5FrameType::getName(); /*malloc mapped memory*/ ForEach<typename Hdf5FrameType::ValueTypeSeq, MallocMemory<bmpl::_1> > mallocMem; mallocMem(byRef(hostFrame), totalNumParticles); log<picLog::INPUT_OUTPUT > ("HDF5: ( end ) malloc mapped memory: %1%") % Hdf5FrameType::getName(); if (totalNumParticles != 0) { log<picLog::INPUT_OUTPUT > ("HDF5: (begin) get mapped memory device pointer: %1%") % Hdf5FrameType::getName(); /*load device pointer of mapped memory*/ Hdf5FrameType deviceFrame; ForEach<typename Hdf5FrameType::ValueTypeSeq, GetDevicePtr<bmpl::_1> > getDevicePtr; getDevicePtr(byRef(deviceFrame), byRef(hostFrame)); log<picLog::INPUT_OUTPUT > ("HDF5: ( end ) get mapped memory device pointer: %1%") % Hdf5FrameType::getName(); log<picLog::INPUT_OUTPUT > ("HDF5: (begin) copy particle to host: %1%") % Hdf5FrameType::getName(); typedef bmpl::vector< typename GetPositionFilter<simDim>::type > usedFilters; typedef typename FilterFactory<usedFilters>::FilterType MyParticleFilter; MyParticleFilter filter; /* activate filter pipeline if moving window is activated */ filter.setStatus(MovingWindow::getInstance().isSlidingWindowActive()); filter.setWindowPosition(params.get()->localWindowToDomainOffset, params.get()->window.localDimensions.size); dim3 block(TILE_SIZE); GridBuffer<int, DIM1> counterBuffer(DataSpace<DIM1>(1)); AreaMapping < CORE + BORDER, MappingDesc > mapper(*(params.get()->cellDescription)); __cudaKernel(copySpecies) (mapper.getGridDim(), block) (counterBuffer.getDeviceBuffer().getPointer(), deviceFrame, speciesTmp->getDeviceParticlesBox(), filter, particleOffset, /*relative to data domain (not to physical domain)*/ mapper ); counterBuffer.deviceToHost(); log<picLog::INPUT_OUTPUT > ("HDF5: ( end ) copy particle to host: %1%") % Hdf5FrameType::getName(); __getTransactionEvent().waitForFinished(); log<picLog::INPUT_OUTPUT > ("HDF5: all events are finished: %1%") % Hdf5FrameType::getName(); /*this cost a little bit of time but hdf5 writing is slower^^*/ assert((uint64_cu) counterBuffer.getHostBuffer().getDataBox()[0] == totalNumParticles); } /*dump to hdf5 file*/ ForEach<typename Hdf5FrameType::ValueTypeSeq, hdf5::ParticleAttribute<bmpl::_1> > writeToHdf5; writeToHdf5(params, byRef(hostFrame), std::string("particles/") + FrameType::getName() + std::string("/") + subGroup, totalNumParticles); /* write meta attributes for species */ writeMetaAttributes(params.get()); /*write species counter table to hdf5 file*/ log<picLog::INPUT_OUTPUT > ("HDF5: (begin) writing particle index table for %1%") % Hdf5FrameType::getName(); { ColTypeUInt64_5Array ctUInt64_5; GridController<simDim>& gc = Environment<simDim>::get().GridController(); const size_t pos_offset = 2; /* particlesMetaInfo = (num particles, scalar position, particle offset x, y, z) */ uint64_t particlesMetaInfo[5] = {totalNumParticles, gc.getScalarPosition(), 0, 0, 0}; for (size_t d = 0; d < simDim; ++d) particlesMetaInfo[pos_offset + d] = particleOffset[d]; /* prevent that top (y) gpus have negative value here */ if (gc.getPosition().y() == 0) particlesMetaInfo[pos_offset + 1] = 0; if (particleOffset[1] < 0) // 1 == y particlesMetaInfo[pos_offset + 1] = 0; params.get()->dataCollector->write( params.get()->currentStep, Dimensions(gc.getGlobalSize(), 1, 1), Dimensions(gc.getGlobalRank(), 0, 0), ctUInt64_5, 1, Dimensions(1, 1, 1), (std::string("particles/") + FrameType::getName() + std::string("/") + subGroup + std::string("/particles_info")).c_str(), particlesMetaInfo); } log<picLog::INPUT_OUTPUT > ("HDF5: ( end ) writing particle index table for %1%") % Hdf5FrameType::getName(); /*free host memory*/ ForEach<typename Hdf5FrameType::ValueTypeSeq, FreeMemory<bmpl::_1> > freeMem; freeMem(byRef(hostFrame)); log<picLog::INPUT_OUTPUT > ("HDF5: ( end ) writing species: %1%") % Hdf5FrameType::getName(); }