__global__ void kernelPositionsParticles(ParticlesBox<FRAME, simDim> pb,
                                         SglParticle<FloatPos>* gParticle,
                                         Mapping mapper)
{

    __shared__ FRAME *frame;
    __shared__ bool isValid;
    __syncthreads(); /*wait that all shared memory is initialised*/

    typedef typename Mapping::SuperCellSize SuperCellSize;

    const DataSpace<simDim > threadIndex(threadIdx);
    const int linearThreadIdx = DataSpaceOperations<simDim>::template map<SuperCellSize > (threadIndex);
    const DataSpace<simDim> superCellIdx(mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx)));

    if (linearThreadIdx == 0)
    {
        frame = &(pb.getLastFrame(superCellIdx, isValid));
    }

    __syncthreads();
    if (!isValid)
        return; //end kernel if we have no frames

    bool isParticle = (*frame)[linearThreadIdx][multiMask_];

    while (isValid)
    {
        if (isParticle)
        {
            PMACC_AUTO(particle,(*frame)[linearThreadIdx]);
            gParticle->position = particle[position_];
            gParticle->momentum = particle[momentum_];
            gParticle->weighting = particle[weighting_];
            gParticle->mass = frame->getMass(gParticle->weighting);
            gParticle->charge = frame->getCharge(gParticle->weighting);
            gParticle->gamma = Gamma<>()(gParticle->momentum, gParticle->mass);

            // storage number in the actual frame
            const lcellId_t frameCellNr = particle[localCellIdx_];

            // offset in the actual superCell = cell offset in the supercell
            const DataSpace<simDim> frameCellOffset(DataSpaceOperations<simDim>::template map<MappingDesc::SuperCellSize > (frameCellNr));


            gParticle->globalCellOffset = (superCellIdx - mapper.getGuardingSuperCells())
                * MappingDesc::SuperCellSize::getDataSpace()
                + frameCellOffset;
        }
        __syncthreads();
        if (linearThreadIdx == 0)
        {
            frame = &(pb.getPreviousFrame(*frame, isValid));
        }
        isParticle = true;
        __syncthreads();
    }

}
__global__ void kernelCountParticles(PBox pb,
                                     uint64_cu* gCounter,
                                     Filter filter,
                                     Mapping mapper)
{

    typedef typename PBox::FrameType FRAME;
    const uint32_t Dim = Mapping::Dim;

    __shared__ FRAME *frame;
    __shared__ bool isValid;
    __shared__ int counter;
    __shared__ lcellId_t particlesInSuperCell;


    __syncthreads(); /*wait that all shared memory is initialised*/

    typedef typename Mapping::SuperCellSize SuperCellSize;

    const DataSpace<Dim > threadIndex(threadIdx);
    const int linearThreadIdx = DataSpaceOperations<Dim>::template map<SuperCellSize > (threadIndex);
    const DataSpace<Dim> superCellIdx(mapper.getSuperCellIndex(DataSpace<Dim > (blockIdx)));

    if (linearThreadIdx == 0)
    {
        frame = &(pb.getLastFrame(superCellIdx, isValid));
        particlesInSuperCell = pb.getSuperCell(superCellIdx).getSizeLastFrame();
        counter = 0;
    }
    __syncthreads();
    if (!isValid)
        return; //end kernel if we have no frames
    filter.setSuperCellPosition((superCellIdx - mapper.getGuardingSuperCells()) * mapper.getSuperCellSize());
    while (isValid)
    {
        if (linearThreadIdx < particlesInSuperCell)
        {
            if (filter(*frame, linearThreadIdx))
                atomicAdd(&counter, 1);
        }
        __syncthreads();
        if (linearThreadIdx == 0)
        {
            frame = &(pb.getPreviousFrame(*frame, isValid));
            particlesInSuperCell = math::CT::volume<SuperCellSize>::type::value;
        }
        __syncthreads();
    }

    __syncthreads();
    if (linearThreadIdx == 0)
    {
        atomicAdd(gCounter, (uint64_cu) counter);
    }
}
__global__ void CountMakroParticle(ParBox parBox, CounterBox counterBox, Mapping mapper)
{

    typedef MappingDesc::SuperCellSize SuperCellSize;
    typedef typename ParBox::FrameType FrameType;

    const DataSpace<simDim> block(mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx)));
    /* counterBox has no guarding supercells*/
    const DataSpace<simDim> counterCell = block - mapper.getGuardingSuperCells();

    const DataSpace<simDim > threadIndex(threadIdx);
    const int linearThreadIdx = DataSpaceOperations<simDim>::template map<SuperCellSize > (threadIndex);

    __shared__ uint64_cu counterValue;
    __shared__ FrameType *frame;
    __shared__ bool isValid;

    if (linearThreadIdx == 0)
    {
        counterValue = 0;
        frame = &(parBox.getLastFrame(block, isValid));
        if (!isValid)
        {
            counterBox(counterCell) = counterValue;
        }
    }
    __syncthreads();
    if (!isValid)
        return; //end kernel if we have no frames

    bool isParticle = (*frame)[linearThreadIdx][multiMask_];

    while (isValid)
    {
        if (isParticle)
        {
            atomicAdd(&counterValue, static_cast<uint64_cu> (1LU));
        }
        __syncthreads();
        if (linearThreadIdx == 0)
        {
            frame = &(parBox.getPreviousFrame(*frame, isValid));
        }
        isParticle = true;
        __syncthreads();
    }

    if (linearThreadIdx == 0)
        counterBox(counterCell) = counterValue;
}
Beispiel #4
0
    __global__ void kernelLineSliceFields(E_DataBox fieldE, B_DataBox fieldB,
    float3_X* sliceDataField,
    DataSpace<simDim> globalCellIdOffset,
    DataSpace<simDim> globalNrOfCells,
    Mapping mapper)
    {

        typedef typename Mapping::SuperCellSize SuperCellSize;

        const DataSpace<simDim > threadIndex(threadIdx);
        const int linearThreadIdx = DataSpaceOperations<simDim>::template map<SuperCellSize > (threadIndex);

        const DataSpace<simDim> superCellIdx(mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx)));


        __syncthreads();

        // GPU-local cell id with lower GPU-local guarding
        const DataSpace<simDim> localCell(superCellIdx * SuperCellSize::toRT() + threadIndex);

        const float3_X b = fieldB(localCell);
        const float3_X e = fieldE(localCell);

        // GPU-local cell id without lower GPU-local guarding
        const DataSpace<simDim> localCellWG(
                localCell
                - SuperCellSize::toRT() * mapper.getGuardingSuperCells());
        // global cell id
        const DataSpace<simDim> globalCell = localCellWG + globalCellIdOffset;


        // slice out one cell along an axis
        if ((globalCell.x() == globalNrOfCells.x() / 2))
#if(SIMDIM==DIM3)
                if(globalCell.z() == globalNrOfCells.z() / 2)
#endif
            sliceDataField[localCellWG.y()] = e;

        __syncthreads();
    }
Beispiel #5
0
__global__ void kernelPaintFields(
                                  EBox fieldE,
                                  BBox fieldB,
                                  JBox fieldJ,
                                  DataBox<PitchedBox<float3_X, DIM2> > image,
                                  DataSpace<DIM2> transpose,
                                  const int slice,
                                  const uint32_t globalOffset,
                                  const uint32_t sliceDim,
                                  Mapping mapper)
{
    typedef typename MappingDesc::SuperCellSize Block;
    const DataSpace<simDim> threadId(threadIdx);
    const DataSpace<simDim> block = mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx));
    const DataSpace<simDim> cell(block * Block::getDataSpace() + threadId);
    const DataSpace<simDim> blockOffset((block - mapper.getGuardingSuperCells()) * Block::getDataSpace());


    const DataSpace<simDim> realCell(cell - MappingDesc::SuperCellSize::getDataSpace() * mapper.getGuardingSuperCells()); //delete guard from cell idx
    const DataSpace<DIM2> imageCell(
                                    realCell[transpose.x()],
                                    realCell[transpose.y()]);
    const DataSpace<simDim> realCell2(blockOffset + threadId); //delete guard from cell idx

#if (SIMDIM==DIM3)
    uint32_t globalCell = realCell2[sliceDim] + globalOffset;

    if (globalCell != slice)
        return;
#endif
    // set fields of this cell to vars
    typename BBox::ValueType field_b = fieldB(cell);
    typename EBox::ValueType field_e = fieldE(cell);
    typename JBox::ValueType field_j = fieldJ(cell);

#if(SIMDIM==DIM3)
    field_j = float3_X(
                       field_j.x() * CELL_HEIGHT * CELL_DEPTH,
                       field_j.y() * CELL_WIDTH * CELL_DEPTH,
                       field_j.z() * CELL_WIDTH * CELL_HEIGHT
                       );
#elif (SIMDIM==DIM2)
    field_j = float3_X(
                       field_j.x() * CELL_HEIGHT,
                       field_j.y() * CELL_WIDTH,
                       field_j.z() * CELL_WIDTH * CELL_HEIGHT
                       );
#endif

    // reset picture to black
    //   color range for each RGB channel: [0.0, 1.0]
    float3_X pic = float3_X(0., 0., 0.);

    // typical values of the fields to normalize them to [0,1]
    //
    pic.x() = visPreview::preChannel1(field_b / typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().x(),
                                      field_e / typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().y(),
                                      field_j / typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().z());
    pic.y() = visPreview::preChannel2(field_b / typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().x(),
                                      field_e / typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().y(),
                                      field_j / typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().z());
    pic.z() = visPreview::preChannel3(field_b / typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().x(),
                                      field_e / typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().y(),
                                      field_j / typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().z());
    //visPreview::preChannel1Col::addRGB(pic,
    //                                   visPreview::preChannel1(field_b * typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().x(),
    //                                                           field_e * typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().y(),
    //                                                           field_j * typicalFields<EM_FIELD_SCALE_CHANNEL1>::get().z()),
    //                                   visPreview::preChannel1_opacity);
    //visPreview::preChannel2Col::addRGB(pic,
    //                                   visPreview::preChannel2(field_b * typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().x(),
    //                                                           field_e * typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().y(),
    //                                                           field_j * typicalFields<EM_FIELD_SCALE_CHANNEL2>::get().z()),
    //                                   visPreview::preChannel2_opacity);
    //visPreview::preChannel3Col::addRGB(pic,
    //                                   visPreview::preChannel3(field_b * typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().x(),
    //                                                           field_e * typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().y(),
    //                                                           field_j * typicalFields<EM_FIELD_SCALE_CHANNEL3>::get().z()),
    //                                   visPreview::preChannel3_opacity);


    // draw to (perhaps smaller) image cell
    image(imageCell) = pic;
}
Beispiel #6
0
    DINLINE void operator()(ParBoxIons ionBox,
                                          ParBoxElectrons electronBox,
                                          FrameIonizer frameIonizer,
                                          Mapping mapper) const
    {

        /* "particle box" : container/iterator where the particles live in
         * and where one can get the frame in a super cell from
         */
        typedef typename ParBoxElectrons::FrameType ELECTRONFRAME;
        typedef typename ParBoxIons::FrameType IONFRAME;
        typedef typename ParBoxIons::FramePtr IonFramePtr;
        typedef typename ParBoxElectrons::FramePtr ElectronFramePtr;

        /* specify field to particle interpolation scheme */
        typedef typename PMacc::traits::Resolve<
            typename GetFlagType<IONFRAME,interpolation<> >::type
        >::type InterpolationScheme;

        /* margins around the supercell for the interpolation of the field on the cells */
        typedef typename GetMargin<InterpolationScheme>::LowerMargin LowerMargin;
        typedef typename GetMargin<InterpolationScheme>::UpperMargin UpperMargin;

        /* relevant area of a block */
        typedef SuperCellDescription<
            typename MappingDesc::SuperCellSize,
            LowerMargin,
            UpperMargin
            > BlockDescription_;

        /* for not mixing operations::assign up with the nvidia functor assign */
        namespace partOp = PMacc::particles::operations;

        /* definitions for domain variables, like indices of blocks and threads */
        typedef typename BlockDescription_::SuperCellSize SuperCellSize;
        /* multi-dimensional offset vector from local domain origin on GPU in units of super cells */
        const DataSpace<simDim> block(mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx)));

        /* multi-dim vector from origin of the block to a cell in units of cells */
        const DataSpace<simDim > threadIndex(threadIdx);
        /* conversion from a multi-dim cell coordinate to a linear coordinate of the cell in its super cell */
        const int linearThreadIdx = DataSpaceOperations<simDim>::template map<SuperCellSize > (threadIndex);

        /* multi-dim offset from the origin of the local domain on GPU
         * to the origin of the block of the in unit of cells
         */
        const DataSpace<simDim> blockCell = block * SuperCellSize::toRT();

        /* subtract guarding cells to only have the simulation volume */
        const DataSpace<simDim> localCellIndex = (block * SuperCellSize::toRT() + threadIndex) - mapper.getGuardingSuperCells() * SuperCellSize::toRT();

        /* typedef for the functor that writes new macro electrons into electron frames during runtime */
        typedef typename particles::ionization::WriteElectronIntoFrame WriteElectronIntoFrame;


        PMACC_SMEM( ionFrame, IonFramePtr );
        PMACC_SMEM( electronFrame,ElectronFramePtr );
        PMACC_SMEM( maxParticlesInFrame, lcellId_t );


        /* find last frame in super cell
         * define maxParticlesInFrame as the maximum frame size
         */
        if (linearThreadIdx == 0)
        {
            ionFrame = ionBox.getLastFrame(block);
            maxParticlesInFrame = PMacc::math::CT::volume<SuperCellSize>::type::value;
        }

        __syncthreads();
        if (!ionFrame.isValid())
            return; //end kernel if we have no frames

        /* caching of E- and B- fields and initialization of random generator if needed */
        frameIonizer.init(blockCell, linearThreadIdx, localCellIndex);

        /* Declare counter in shared memory that will later tell the current fill level or
         * occupation of the newly created target electron frames.
         */
        PMACC_SMEM( newFrameFillLvl, int );

        /* Declare local variable oldFrameFillLvl for each thread */
        int oldFrameFillLvl;

        /* Initialize local (register) counter for each thread
         * - describes how many new macro electrons should be created
         */
        unsigned int newMacroElectrons = 0;

        /* Declare local electron ID
         * - describes at which position in the new frame the new electron is to be created
         */
        int electronId;

        /* Master initializes the frame fill level with 0 */
        if (linearThreadIdx == 0)
        {
            newFrameFillLvl = 0;
            electronFrame = nullptr;
        }
        __syncthreads();

        /* move over source species frames and call frameIonizer
         * frames are worked on in backwards order to avoid asking if there is another frame
         * --> performance
         * Because all frames are completely filled except the last and apart from that last frame
         * one wants to make sure that all threads are working and every frame is worked on.
         */
        while (ionFrame.isValid())
        {
            /* casting uint8_t multiMask to boolean */
            const bool isParticle = ionFrame[linearThreadIdx][multiMask_];
            __syncthreads();

            /* < IONIZATION and change of charge states >
             * if the threads contain particles, the frameIonizer can ionize them
             * if they are non-particles their inner ionization counter remains at 0
             */
            if (isParticle)
                /* ionization based on ionization model - this actually increases charge states*/
                frameIonizer(*ionFrame, linearThreadIdx, newMacroElectrons);

            __syncthreads();
            /* always true while-loop over all particles inside source frame until each thread breaks out individually
             *
             * **Attention**: Speaking of 1st and 2nd frame only may seem odd.
             * The question might arise what happens if more electrons are created than would fit into two frames.
             * Well, multi-ionization during a time step is accounted for. The number of new electrons is
             * determined inside the outer loop over the valid frames while in the inner loop each thread can create only ONE
             * new macro electron. But the loop repeats until each thread has created all the electrons needed in the time step.
             */
            while (true)
            {
                /* < INIT >
                 * - electronId is initialized as -1 (meaning: invalid)
                 * - (local) oldFrameFillLvl set equal to (shared) newFrameFillLvl for each thread
                 * --> each thread remembers the old "counter"
                 * - then sync
                 */
                electronId = -1;
                oldFrameFillLvl = newFrameFillLvl;
                __syncthreads();
                /* < CHECK & ADD >
                 * - if a thread wants to create electrons in each cycle it can do that only once
                 * and before that it atomically adds to the shared counter and uses the current
                 * value as electronId in the new frame
                 * - then sync
                 */
                if (newMacroElectrons > 0)
                    electronId = nvidia::atomicAllInc(&newFrameFillLvl);

                __syncthreads();
                /* < EXIT? >
                 * - if the counter hasn't changed all threads break out of the loop */
                if (oldFrameFillLvl == newFrameFillLvl)
                    break;

                __syncthreads();
                /* < FIRST NEW FRAME >
                 * - if there is no frame, yet, the master will create a new target electron frame
                 * and attach it to the back of the frame list
                 * - sync all threads again for them to know which frame to use
                 */
                if (linearThreadIdx == 0)
                {
                    if (!electronFrame.isValid())
                    {
                        electronFrame = electronBox.getEmptyFrame();
                        electronBox.setAsLastFrame(electronFrame, block);
                    }
                }
                __syncthreads();
                /* < CREATE 1 >
                 * - all electrons fitting into the current frame are created there
                 * - internal ionization counter is decremented by 1
                 * - sync
                 */
                if ((0 <= electronId) && (electronId < maxParticlesInFrame))
                {
                    /* each thread makes the attributes of its ion accessible */
                    auto parentIon = (ionFrame[linearThreadIdx]);
                    /* each thread initializes an electron if one should be created */
                    auto targetElectronFull = (electronFrame[electronId]);

                    /* create an electron in the new electron frame:
                     * - see particles/ionization/ionizationMethods.hpp
                     */
                    WriteElectronIntoFrame writeElectron;
                    writeElectron(parentIon,targetElectronFull);

                    newMacroElectrons -= 1;
                }
                __syncthreads();
                /* < SECOND NEW FRAME >
                 * - if the shared counter is larger than the frame size a new electron frame is reserved
                 * and attached to the back of the frame list
                 * - then the shared counter is set back by one frame size
                 * - sync so that every thread knows about the new frame
                 */
                if (linearThreadIdx == 0)
                {
                    if (newFrameFillLvl >= maxParticlesInFrame)
                    {
                        electronFrame = electronBox.getEmptyFrame();
                        electronBox.setAsLastFrame(electronFrame, block);
                        newFrameFillLvl -= maxParticlesInFrame;
                    }
                }
                __syncthreads();
                /* < CREATE 2 >
                 * - if the EID is larger than the frame size
                 *      - the EID is set back by one frame size
                 *      - the thread writes an electron to the new frame
                 *      - the internal counter is decremented by 1
                 */
                if (electronId >= maxParticlesInFrame)
                {
                    electronId -= maxParticlesInFrame;

                    /* each thread makes the attributes of its ion accessible */
                    auto parentIon = ((*ionFrame)[linearThreadIdx]);
                    /* each thread initializes an electron if one should be produced */
                    auto targetElectronFull = (electronFrame[electronId]);

                    /* create an electron in the new electron frame:
                     * - see particles/ionization/ionizationMethods.hpp
                     */
                    WriteElectronIntoFrame writeElectron;
                    writeElectron(parentIon,targetElectronFull);

                    newMacroElectrons -= 1;
                }
                __syncthreads();
            }
            __syncthreads();

            if (linearThreadIdx == 0)
            {
                ionFrame = ionBox.getPreviousFrame(ionFrame);
                maxParticlesInFrame = PMacc::math::CT::volume<SuperCellSize>::type::value;
            }
            __syncthreads();
        }
    } // void kernelIonizeParticles