__global__ void kernelPositionsParticles(ParticlesBox<FRAME, simDim> pb,
                                         SglParticle<FloatPos>* gParticle,
                                         Mapping mapper)
{

    __shared__ FRAME *frame;
    __shared__ bool isValid;
    __syncthreads(); /*wait that all shared memory is initialised*/

    typedef typename Mapping::SuperCellSize SuperCellSize;

    const DataSpace<simDim > threadIndex(threadIdx);
    const int linearThreadIdx = DataSpaceOperations<simDim>::template map<SuperCellSize > (threadIndex);
    const DataSpace<simDim> superCellIdx(mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx)));

    if (linearThreadIdx == 0)
    {
        frame = &(pb.getLastFrame(superCellIdx, isValid));
    }

    __syncthreads();
    if (!isValid)
        return; //end kernel if we have no frames

    bool isParticle = (*frame)[linearThreadIdx][multiMask_];

    while (isValid)
    {
        if (isParticle)
        {
            PMACC_AUTO(particle,(*frame)[linearThreadIdx]);
            gParticle->position = particle[position_];
            gParticle->momentum = particle[momentum_];
            gParticle->weighting = particle[weighting_];
            gParticle->mass = frame->getMass(gParticle->weighting);
            gParticle->charge = frame->getCharge(gParticle->weighting);
            gParticle->gamma = Gamma<>()(gParticle->momentum, gParticle->mass);

            // storage number in the actual frame
            const lcellId_t frameCellNr = particle[localCellIdx_];

            // offset in the actual superCell = cell offset in the supercell
            const DataSpace<simDim> frameCellOffset(DataSpaceOperations<simDim>::template map<MappingDesc::SuperCellSize > (frameCellNr));


            gParticle->globalCellOffset = (superCellIdx - mapper.getGuardingSuperCells())
                * MappingDesc::SuperCellSize::getDataSpace()
                + frameCellOffset;
        }
        __syncthreads();
        if (linearThreadIdx == 0)
        {
            frame = &(pb.getPreviousFrame(*frame, isValid));
        }
        isParticle = true;
        __syncthreads();
    }

}
예제 #2
0
__global__ void kernelCountParticles(PBox pb,
                                     uint64_cu* gCounter,
                                     Filter filter,
                                     Mapping mapper)
{

    typedef typename PBox::FrameType FRAME;
    const uint32_t Dim = Mapping::Dim;

    __shared__ FRAME *frame;
    __shared__ bool isValid;
    __shared__ int counter;
    __shared__ lcellId_t particlesInSuperCell;


    __syncthreads(); /*wait that all shared memory is initialised*/

    typedef typename Mapping::SuperCellSize SuperCellSize;

    const DataSpace<Dim > threadIndex(threadIdx);
    const int linearThreadIdx = DataSpaceOperations<Dim>::template map<SuperCellSize > (threadIndex);
    const DataSpace<Dim> superCellIdx(mapper.getSuperCellIndex(DataSpace<Dim > (blockIdx)));

    if (linearThreadIdx == 0)
    {
        frame = &(pb.getLastFrame(superCellIdx, isValid));
        particlesInSuperCell = pb.getSuperCell(superCellIdx).getSizeLastFrame();
        counter = 0;
    }
    __syncthreads();
    if (!isValid)
        return; //end kernel if we have no frames
    filter.setSuperCellPosition((superCellIdx - mapper.getGuardingSuperCells()) * mapper.getSuperCellSize());
    while (isValid)
    {
        if (linearThreadIdx < particlesInSuperCell)
        {
            if (filter(*frame, linearThreadIdx))
                atomicAdd(&counter, 1);
        }
        __syncthreads();
        if (linearThreadIdx == 0)
        {
            frame = &(pb.getPreviousFrame(*frame, isValid));
            particlesInSuperCell = math::CT::volume<SuperCellSize>::type::value;
        }
        __syncthreads();
    }

    __syncthreads();
    if (linearThreadIdx == 0)
    {
        atomicAdd(gCounter, (uint64_cu) counter);
    }
}
예제 #3
0
__global__ void CountMakroParticle(ParBox parBox, CounterBox counterBox, Mapping mapper)
{

    typedef MappingDesc::SuperCellSize SuperCellSize;
    typedef typename ParBox::FrameType FrameType;

    const DataSpace<simDim> block(mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx)));
    /* counterBox has no guarding supercells*/
    const DataSpace<simDim> counterCell = block - mapper.getGuardingSuperCells();

    const DataSpace<simDim > threadIndex(threadIdx);
    const int linearThreadIdx = DataSpaceOperations<simDim>::template map<SuperCellSize > (threadIndex);

    __shared__ uint64_cu counterValue;
    __shared__ FrameType *frame;
    __shared__ bool isValid;

    if (linearThreadIdx == 0)
    {
        counterValue = 0;
        frame = &(parBox.getLastFrame(block, isValid));
        if (!isValid)
        {
            counterBox(counterCell) = counterValue;
        }
    }
    __syncthreads();
    if (!isValid)
        return; //end kernel if we have no frames

    bool isParticle = (*frame)[linearThreadIdx][multiMask_];

    while (isValid)
    {
        if (isParticle)
        {
            atomicAdd(&counterValue, static_cast<uint64_cu> (1LU));
        }
        __syncthreads();
        if (linearThreadIdx == 0)
        {
            frame = &(parBox.getPreviousFrame(*frame, isValid));
        }
        isParticle = true;
        __syncthreads();
    }

    if (linearThreadIdx == 0)
        counterBox(counterCell) = counterValue;
}
예제 #4
0
    __global__ void
    kernelCellwiseOperation( FieldBox field, T_OpFunctor opFunctor, T_ValFunctor valFunctor, const DataSpace<simDim> totalCellOffset,
        const uint32_t currentStep, Mapping mapper )
    {
        const DataSpace<simDim> block( mapper.getSuperCellIndex( DataSpace<simDim>( blockIdx ) ) );
        const DataSpace<simDim> blockCell = block * MappingDesc::SuperCellSize::toRT();

        const DataSpace<simDim> threadIndex( threadIdx );

        opFunctor( field( blockCell + threadIndex ),
                   valFunctor( blockCell + threadIndex + totalCellOffset,
                               currentStep )
                 );
    }
예제 #5
0
            DINLINE void operator()(BoxReadOnly buffRead,
                                      BoxWriteOnly buffWrite,
                                      uint32_t rule,
                                      Mapping mapper) const
            {
                typedef typename BoxReadOnly::ValueType Type;
                typedef SuperCellDescription<
                        typename Mapping::SuperCellSize,
                        math::CT::Int< 1, 1 >,
                        math::CT::Int< 1, 1 >
                        > BlockArea;
                auto cache = CachedBox::create < 0, Type > (BlockArea());

                const Space block(mapper.getSuperCellIndex(Space(blockIdx)));
                const Space blockCell = block * Mapping::SuperCellSize::toRT();
                const Space threadIndex(threadIdx);
                auto buffRead_shifted = buffRead.shift(blockCell);

                ThreadCollective<BlockArea> collective(threadIndex);

                nvidia::functors::Assign assign;
                collective(
                          assign,
                          cache,
                          buffRead_shifted
                          );
                __syncthreads();

                Type neighbors = 0;
                for (uint32_t i = 1; i < 9; ++i)
                {
                    Space offset(Mask::getRelativeDirections<DIM2 > (i));
                    neighbors += cache(threadIndex + offset);
                }

                Type isLife = cache(threadIndex);
                isLife = (bool)(((!isLife)*(1 << (neighbors + 9))) & rule) +
                        (bool)(((isLife)*(1 << (neighbors))) & rule);

                buffWrite(blockCell + threadIndex) = isLife;
            }
예제 #6
0
    __global__ void kernelLineSliceFields(E_DataBox fieldE, B_DataBox fieldB,
    float3_X* sliceDataField,
    DataSpace<simDim> globalCellIdOffset,
    DataSpace<simDim> globalNrOfCells,
    Mapping mapper)
    {

        typedef typename Mapping::SuperCellSize SuperCellSize;

        const DataSpace<simDim > threadIndex(threadIdx);
        const int linearThreadIdx = DataSpaceOperations<simDim>::template map<SuperCellSize > (threadIndex);

        const DataSpace<simDim> superCellIdx(mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx)));


        __syncthreads();

        // GPU-local cell id with lower GPU-local guarding
        const DataSpace<simDim> localCell(superCellIdx * SuperCellSize::toRT() + threadIndex);

        const float3_X b = fieldB(localCell);
        const float3_X e = fieldE(localCell);

        // GPU-local cell id without lower GPU-local guarding
        const DataSpace<simDim> localCellWG(
                localCell
                - SuperCellSize::toRT() * mapper.getGuardingSuperCells());
        // global cell id
        const DataSpace<simDim> globalCell = localCellWG + globalCellIdOffset;


        // slice out one cell along an axis
        if ((globalCell.x() == globalNrOfCells.x() / 2))
#if(SIMDIM==DIM3)
                if(globalCell.z() == globalNrOfCells.z() / 2)
#endif
            sliceDataField[localCellWG.y()] = e;

        __syncthreads();
    }
예제 #7
0
            DINLINE void operator()(BoxWriteOnly buffWrite,
                                       uint32_t seed,
                                       float fraction,
                                       Mapping mapper) const
            {
                /* get position in grid in units of SuperCells from blockID */
                const Space block(mapper.getSuperCellIndex(Space(blockIdx)));
                /* convert position in unit of cells */
                const Space blockCell = block * Mapping::SuperCellSize::toRT();
                /* convert CUDA dim3 to DataSpace<DIM3> */
                const Space threadIndex(threadIdx);
                const uint32_t cellIdx = DataSpaceOperations<DIM2>::map(
                        mapper.getGridSuperCells() * Mapping::SuperCellSize::toRT(),
                        blockCell + threadIndex);

                /* get uniform random number from seed  */
                auto rng = nvidia::rng::create(
                                    nvidia::rng::methods::Xor(seed, cellIdx),
                                    nvidia::rng::distributions::Uniform_float());

                /* write 1(white) if uniform random number 0<rng<1 is smaller than 'fraction' */
                buffWrite(blockCell + threadIndex) = (rng() <= fraction);
            }
예제 #8
0
__global__ void
kernelParticleDensity(ParBox pb,
                      DataBox<PitchedBox<Type_, DIM2> > image,
                      DataSpace<DIM2> transpose,
                      int slice,
                      uint32_t globalOffset,
                      uint32_t sliceDim,
                      Mapping mapper)
{
    typedef typename ParBox::FrameType FRAME;
    typedef typename MappingDesc::SuperCellSize Block;
    __shared__ FRAME *frame;
    __shared__ bool isValid;
    __syncthreads(); /*wait that all shared memory is initialised*/

    bool isImageThread = false;

    const DataSpace<simDim> threadId(threadIdx);
    const DataSpace<DIM2> localCell(threadId[transpose.x()], threadId[transpose.y()]);
    const DataSpace<simDim> block = mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx));
    const DataSpace<simDim> blockOffset((block - 1) * Block::getDataSpace());


    int localId = threadIdx.z * Block::x * Block::y + threadIdx.y * Block::x + threadIdx.x;


    if (localId == 0)
        isValid = false;
    __syncthreads();

    //\todo: guard size should not be set to (fixed) 1 here
    const DataSpace<simDim> realCell(blockOffset + threadId); //delete guard from cell idx


    uint32_t globalCell = realCell[sliceDim] + globalOffset;

    if (globalCell == slice)
    {
        isValid = true;
        isImageThread = true;
    }
    __syncthreads();

    if (!isValid)
        return;

    /*index in image*/
    DataSpace<DIM2> imageCell(
                              realCell[transpose.x()],
                              realCell[transpose.y()]);


    // counter is always DIM2
    typedef DataBox < PitchedBox< float_X, DIM2 > > SharedMem;
    extern __shared__ float_X shBlock[];
    __syncthreads(); /*wait that all shared memory is initialised*/

    const DataSpace<simDim> blockSize(blockDim);
    SharedMem counter(PitchedBox<float_X, DIM2 > ((float_X*) shBlock,
                                              DataSpace<DIM2 > (),
                                              blockSize[transpose.x()] * sizeof (float_X)));

    if (isImageThread)
    {
        counter(localCell) = float_X(0.0);
    }


    if (localId == 0)
    {
        frame = &(pb.getFirstFrame(block, isValid));
    }
    __syncthreads();

    while (isValid) //move over all Frames
    {
        PMACC_AUTO(particle,(*frame)[localId]);
        if (particle[multiMask_] == 1)
        {
            int cellIdx = particle[localCellIdx_];
            // we only draw the first slice of cells in the super cell (z == 0)
            const DataSpace<DIM3> particleCellId(DataSpaceOperations<DIM3>::template map<Block > (cellIdx));
            uint32_t globalParticleCell = particleCellId[sliceDim] + globalOffset + blockOffset[sliceDim];
            if (globalParticleCell == slice)
            {
                const DataSpace<DIM2> reducedCell(particleCellId[transpose.x()], particleCellId[transpose.y()]);
                atomicAddWrapper(&(counter(reducedCell)), particle[weighting_] / NUM_EL_PER_PARTICLE);
            }
        }
        __syncthreads();

        if (localId == 0)
        {
            frame = &(pb.getNextFrame(*frame, isValid));
        }
        __syncthreads();
    }


    if (isImageThread)
    {
        image(imageCell) = (Type_) counter(localCell);
    }
}
예제 #9
0
__global__ void
kernelPaintParticles3D(ParBox pb,
                       DataBox<PitchedBox<float3_X, DIM2> > image,
                       DataSpace<DIM2> transpose,
                       int slice,
                       uint32_t globalOffset,
                       uint32_t sliceDim,
                       Mapping mapper)
{
    typedef typename ParBox::FrameType FRAME;
    typedef typename MappingDesc::SuperCellSize Block;
    __shared__ FRAME *frame;
    __shared__ bool isValid;
    __syncthreads(); /*wait that all shared memory is initialised*/
    bool isImageThread = false;

    const DataSpace<simDim> threadId(threadIdx);
    const DataSpace<DIM2> localCell(threadId[transpose.x()], threadId[transpose.y()]);
    const DataSpace<simDim> block = mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx));
    const DataSpace<simDim> blockOffset((block - 1) * Block::getDataSpace());


    int localId = threadIdx.z * Block::x * Block::y + threadIdx.y * Block::x + threadIdx.x;


    if (localId == 0)
        isValid = false;
    __syncthreads();

    //\todo: guard size should not be set to (fixed) 1 here
    const DataSpace<simDim> realCell(blockOffset + threadId); //delete guard from cell idx

#if(SIMDIM==DIM3)
    uint32_t globalCell = realCell[sliceDim] + globalOffset;

    if (globalCell == slice)
#endif
    {
        atomicExch((int*) &isValid, 1); /*WAW Error in cuda-memcheck racecheck*/
        isImageThread = true;
    }
    __syncthreads();

    if (!isValid)
        return;

    /*index in image*/
    DataSpace<DIM2> imageCell(
                              realCell[transpose.x()],
                              realCell[transpose.y()]);


    // counter is always DIM2
    typedef DataBox < PitchedBox< float_X, DIM2 > > SharedMem;
    extern __shared__ float_X shBlock[];
    __syncthreads(); /*wait that all shared memory is initialised*/

    const DataSpace<simDim> blockSize(blockDim);
    SharedMem counter(PitchedBox<float_X, DIM2 > ((float_X*) shBlock,
                                                  DataSpace<DIM2 > (),
                                                  blockSize[transpose.x()] * sizeof (float_X)));

    if (isImageThread)
    {
        counter(localCell) = float_X(0.0);
    }


    if (localId == 0)
    {
        frame = &(pb.getFirstFrame(block, isValid));
    }
    __syncthreads();

    while (isValid) //move over all Frames
    {
        PMACC_AUTO(particle,(*frame)[localId]);
        if (particle[multiMask_] == 1)
        {
            int cellIdx = particle[localCellIdx_];
            // we only draw the first slice of cells in the super cell (z == 0)
            const DataSpace<simDim> particleCellId(DataSpaceOperations<simDim>::template map<Block > (cellIdx));
#if(SIMDIM==DIM3)
            uint32_t globalParticleCell = particleCellId[sliceDim] + globalOffset + blockOffset[sliceDim];
            if (globalParticleCell == slice)
#endif
            {
                const DataSpace<DIM2> reducedCell(particleCellId[transpose.x()], particleCellId[transpose.y()]);
                atomicAddWrapper(&(counter(reducedCell)), particle[weighting_] / NUM_EL_PER_PARTICLE);
            }
        }
        __syncthreads();

        if (localId == 0)
        {
            frame = &(pb.getNextFrame(*frame, isValid));
        }
        __syncthreads();
    }


    if (isImageThread)
    {
        /** Note: normally, we would multiply by NUM_EL_PER_PARTICLE again.
         *  BUT: since we are interested in a simple value between 0 and 1,
         *       we stay with this number (normalized to the order of macro
         *       particles) and devide by the number of typical macro particles
         *       per cell
         */
        float_X value = counter(localCell)
            / float_X(particleInit::NUM_PARTICLES_PER_CELL); // * NUM_EL_PER_PARTICLE;
        if (value > 1.0) value = 1.0;

        //image(imageCell).x() = value;
        visPreview::preParticleDensCol::addRGB(image(imageCell),
                                               value,
                                               visPreview::preParticleDens_opacity);

        // cut to [0, 1]
        if (image(imageCell).x() < float_X(0.0)) image(imageCell).x() = float_X(0.0);
        if (image(imageCell).x() > float_X(1.0)) image(imageCell).x() = float_X(1.0);
        if (image(imageCell).y() < float_X(0.0)) image(imageCell).y() = float_X(0.0);
        if (image(imageCell).y() > float_X(1.0)) image(imageCell).y() = float_X(1.0);
        if (image(imageCell).z() < float_X(0.0)) image(imageCell).z() = float_X(0.0);
        if (image(imageCell).z() > float_X(1.0)) image(imageCell).z() = float_X(1.0);
    }
}
예제 #10
0
__global__ void kernelPaintFields(
                                  EBox fieldE,
                                  BBox fieldB,
                                  JBox fieldJ,
                                  DataBox<PitchedBox<float3_X, DIM2> > image,
                                  DataSpace<DIM2> transpose,
                                  const int slice,
                                  const uint32_t globalOffset,
                                  const uint32_t sliceDim,
                                  Mapping mapper)
{
    typedef typename MappingDesc::SuperCellSize Block;
    const DataSpace<simDim> threadId(threadIdx);
    const DataSpace<simDim> block = mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx));
    const DataSpace<simDim> cell(block * Block::getDataSpace() + threadId);
    const DataSpace<simDim> blockOffset((block - mapper.getGuardingSuperCells()) * Block::getDataSpace());


    const DataSpace<simDim> realCell(cell - MappingDesc::SuperCellSize::getDataSpace() * mapper.getGuardingSuperCells()); //delete guard from cell idx
    const DataSpace<DIM2> imageCell(
                                    realCell[transpose.x()],
                                    realCell[transpose.y()]);
    const DataSpace<simDim> realCell2(blockOffset + threadId); //delete guard from cell idx

#if (SIMDIM==DIM3)
    uint32_t globalCell = realCell2[sliceDim] + globalOffset;

    if (globalCell != slice)
        return;
#endif
    // set fields of this cell to vars
    typename BBox::ValueType field_b = fieldB(cell);
    typename EBox::ValueType field_e = fieldE(cell);
    typename JBox::ValueType field_j = fieldJ(cell);

#if(SIMDIM==DIM3)
    field_j = float3_X(
                       field_j.x() * CELL_HEIGHT * CELL_DEPTH,
                       field_j.y() * CELL_WIDTH * CELL_DEPTH,
                       field_j.z() * CELL_WIDTH * CELL_HEIGHT
                       );
#elif (SIMDIM==DIM2)
    field_j = float3_X(
                       field_j.x() * CELL_HEIGHT,
                       field_j.y() * CELL_WIDTH,
                       field_j.z() * CELL_WIDTH * CELL_HEIGHT
                       );
#endif

    // reset picture to black
    //   color range for each RGB channel: [0.0, 1.0]
    float3_X pic = float3_X(0., 0., 0.);

    // typical values of the fields to normalize them to [0,1]
    //
    pic.x() = visPreview::preChannel1(field_b / typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().x(),
                                      field_e / typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().y(),
                                      field_j / typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().z());
    pic.y() = visPreview::preChannel2(field_b / typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().x(),
                                      field_e / typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().y(),
                                      field_j / typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().z());
    pic.z() = visPreview::preChannel3(field_b / typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().x(),
                                      field_e / typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().y(),
                                      field_j / typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().z());
    //visPreview::preChannel1Col::addRGB(pic,
    //                                   visPreview::preChannel1(field_b * typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().x(),
    //                                                           field_e * typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().y(),
    //                                                           field_j * typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().z()),
    //                                   visPreview::preChannel1_opacity);
    //visPreview::preChannel2Col::addRGB(pic,
    //                                   visPreview::preChannel2(field_b * typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().x(),
    //                                                           field_e * typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().y(),
    //                                                           field_j * typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().z()),
    //                                   visPreview::preChannel2_opacity);
    //visPreview::preChannel3Col::addRGB(pic,
    //                                   visPreview::preChannel3(field_b * typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().x(),
    //                                                           field_e * typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().y(),
    //                                                           field_j * typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().z()),
    //                                   visPreview::preChannel3_opacity);


    // draw to (perhaps smaller) image cell
    image(imageCell) = pic;
}
예제 #11
0
    DINLINE void operator()(ParBoxIons ionBox,
                                          ParBoxElectrons electronBox,
                                          FrameIonizer frameIonizer,
                                          Mapping mapper) const
    {

        /* "particle box" : container/iterator where the particles live in
         * and where one can get the frame in a super cell from
         */
        typedef typename ParBoxElectrons::FrameType ELECTRONFRAME;
        typedef typename ParBoxIons::FrameType IONFRAME;
        typedef typename ParBoxIons::FramePtr IonFramePtr;
        typedef typename ParBoxElectrons::FramePtr ElectronFramePtr;

        /* specify field to particle interpolation scheme */
        typedef typename PMacc::traits::Resolve<
            typename GetFlagType<IONFRAME,interpolation<> >::type
        >::type InterpolationScheme;

        /* margins around the supercell for the interpolation of the field on the cells */
        typedef typename GetMargin<InterpolationScheme>::LowerMargin LowerMargin;
        typedef typename GetMargin<InterpolationScheme>::UpperMargin UpperMargin;

        /* relevant area of a block */
        typedef SuperCellDescription<
            typename MappingDesc::SuperCellSize,
            LowerMargin,
            UpperMargin
            > BlockDescription_;

        /* for not mixing operations::assign up with the nvidia functor assign */
        namespace partOp = PMacc::particles::operations;

        /* definitions for domain variables, like indices of blocks and threads */
        typedef typename BlockDescription_::SuperCellSize SuperCellSize;
        /* multi-dimensional offset vector from local domain origin on GPU in units of super cells */
        const DataSpace<simDim> block(mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx)));

        /* multi-dim vector from origin of the block to a cell in units of cells */
        const DataSpace<simDim > threadIndex(threadIdx);
        /* conversion from a multi-dim cell coordinate to a linear coordinate of the cell in its super cell */
        const int linearThreadIdx = DataSpaceOperations<simDim>::template map<SuperCellSize > (threadIndex);

        /* multi-dim offset from the origin of the local domain on GPU
         * to the origin of the block of the in unit of cells
         */
        const DataSpace<simDim> blockCell = block * SuperCellSize::toRT();

        /* subtract guarding cells to only have the simulation volume */
        const DataSpace<simDim> localCellIndex = (block * SuperCellSize::toRT() + threadIndex) - mapper.getGuardingSuperCells() * SuperCellSize::toRT();

        /* typedef for the functor that writes new macro electrons into electron frames during runtime */
        typedef typename particles::ionization::WriteElectronIntoFrame WriteElectronIntoFrame;


        PMACC_SMEM( ionFrame, IonFramePtr );
        PMACC_SMEM( electronFrame,ElectronFramePtr );
        PMACC_SMEM( maxParticlesInFrame, lcellId_t );


        /* find last frame in super cell
         * define maxParticlesInFrame as the maximum frame size
         */
        if (linearThreadIdx == 0)
        {
            ionFrame = ionBox.getLastFrame(block);
            maxParticlesInFrame = PMacc::math::CT::volume<SuperCellSize>::type::value;
        }

        __syncthreads();
        if (!ionFrame.isValid())
            return; //end kernel if we have no frames

        /* caching of E- and B- fields and initialization of random generator if needed */
        frameIonizer.init(blockCell, linearThreadIdx, localCellIndex);

        /* Declare counter in shared memory that will later tell the current fill level or
         * occupation of the newly created target electron frames.
         */
        PMACC_SMEM( newFrameFillLvl, int );

        /* Declare local variable oldFrameFillLvl for each thread */
        int oldFrameFillLvl;

        /* Initialize local (register) counter for each thread
         * - describes how many new macro electrons should be created
         */
        unsigned int newMacroElectrons = 0;

        /* Declare local electron ID
         * - describes at which position in the new frame the new electron is to be created
         */
        int electronId;

        /* Master initializes the frame fill level with 0 */
        if (linearThreadIdx == 0)
        {
            newFrameFillLvl = 0;
            electronFrame = nullptr;
        }
        __syncthreads();

        /* move over source species frames and call frameIonizer
         * frames are worked on in backwards order to avoid asking if there is another frame
         * --> performance
         * Because all frames are completely filled except the last and apart from that last frame
         * one wants to make sure that all threads are working and every frame is worked on.
         */
        while (ionFrame.isValid())
        {
            /* casting uint8_t multiMask to boolean */
            const bool isParticle = ionFrame[linearThreadIdx][multiMask_];
            __syncthreads();

            /* < IONIZATION and change of charge states >
             * if the threads contain particles, the frameIonizer can ionize them
             * if they are non-particles their inner ionization counter remains at 0
             */
            if (isParticle)
                /* ionization based on ionization model - this actually increases charge states*/
                frameIonizer(*ionFrame, linearThreadIdx, newMacroElectrons);

            __syncthreads();
            /* always true while-loop over all particles inside source frame until each thread breaks out individually
             *
             * **Attention**: Speaking of 1st and 2nd frame only may seem odd.
             * The question might arise what happens if more electrons are created than would fit into two frames.
             * Well, multi-ionization during a time step is accounted for. The number of new electrons is
             * determined inside the outer loop over the valid frames while in the inner loop each thread can create only ONE
             * new macro electron. But the loop repeats until each thread has created all the electrons needed in the time step.
             */
            while (true)
            {
                /* < INIT >
                 * - electronId is initialized as -1 (meaning: invalid)
                 * - (local) oldFrameFillLvl set equal to (shared) newFrameFillLvl for each thread
                 * --> each thread remembers the old "counter"
                 * - then sync
                 */
                electronId = -1;
                oldFrameFillLvl = newFrameFillLvl;
                __syncthreads();
                /* < CHECK & ADD >
                 * - if a thread wants to create electrons in each cycle it can do that only once
                 * and before that it atomically adds to the shared counter and uses the current
                 * value as electronId in the new frame
                 * - then sync
                 */
                if (newMacroElectrons > 0)
                    electronId = nvidia::atomicAllInc(&newFrameFillLvl);

                __syncthreads();
                /* < EXIT? >
                 * - if the counter hasn't changed all threads break out of the loop */
                if (oldFrameFillLvl == newFrameFillLvl)
                    break;

                __syncthreads();
                /* < FIRST NEW FRAME >
                 * - if there is no frame, yet, the master will create a new target electron frame
                 * and attach it to the back of the frame list
                 * - sync all threads again for them to know which frame to use
                 */
                if (linearThreadIdx == 0)
                {
                    if (!electronFrame.isValid())
                    {
                        electronFrame = electronBox.getEmptyFrame();
                        electronBox.setAsLastFrame(electronFrame, block);
                    }
                }
                __syncthreads();
                /* < CREATE 1 >
                 * - all electrons fitting into the current frame are created there
                 * - internal ionization counter is decremented by 1
                 * - sync
                 */
                if ((0 <= electronId) && (electronId < maxParticlesInFrame))
                {
                    /* each thread makes the attributes of its ion accessible */
                    auto parentIon = (ionFrame[linearThreadIdx]);
                    /* each thread initializes an electron if one should be created */
                    auto targetElectronFull = (electronFrame[electronId]);

                    /* create an electron in the new electron frame:
                     * - see particles/ionization/ionizationMethods.hpp
                     */
                    WriteElectronIntoFrame writeElectron;
                    writeElectron(parentIon,targetElectronFull);

                    newMacroElectrons -= 1;
                }
                __syncthreads();
                /* < SECOND NEW FRAME >
                 * - if the shared counter is larger than the frame size a new electron frame is reserved
                 * and attached to the back of the frame list
                 * - then the shared counter is set back by one frame size
                 * - sync so that every thread knows about the new frame
                 */
                if (linearThreadIdx == 0)
                {
                    if (newFrameFillLvl >= maxParticlesInFrame)
                    {
                        electronFrame = electronBox.getEmptyFrame();
                        electronBox.setAsLastFrame(electronFrame, block);
                        newFrameFillLvl -= maxParticlesInFrame;
                    }
                }
                __syncthreads();
                /* < CREATE 2 >
                 * - if the EID is larger than the frame size
                 *      - the EID is set back by one frame size
                 *      - the thread writes an electron to the new frame
                 *      - the internal counter is decremented by 1
                 */
                if (electronId >= maxParticlesInFrame)
                {
                    electronId -= maxParticlesInFrame;

                    /* each thread makes the attributes of its ion accessible */
                    auto parentIon = ((*ionFrame)[linearThreadIdx]);
                    /* each thread initializes an electron if one should be produced */
                    auto targetElectronFull = (electronFrame[electronId]);

                    /* create an electron in the new electron frame:
                     * - see particles/ionization/ionizationMethods.hpp
                     */
                    WriteElectronIntoFrame writeElectron;
                    writeElectron(parentIon,targetElectronFull);

                    newMacroElectrons -= 1;
                }
                __syncthreads();
            }
            __syncthreads();

            if (linearThreadIdx == 0)
            {
                ionFrame = ionBox.getPreviousFrame(ionFrame);
                maxParticlesInFrame = PMacc::math::CT::volume<SuperCellSize>::type::value;
            }
            __syncthreads();
        }
    } // void kernelIonizeParticles