__global__ void CountMakroParticle(ParBox parBox, CounterBox counterBox, Mapping mapper)
{

    typedef MappingDesc::SuperCellSize SuperCellSize;
    typedef typename ParBox::FrameType FrameType;

    const DataSpace<simDim> block(mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx)));
    /* counterBox has no guarding supercells*/
    const DataSpace<simDim> counterCell = block - mapper.getGuardingSuperCells();

    const DataSpace<simDim > threadIndex(threadIdx);
    const int linearThreadIdx = DataSpaceOperations<simDim>::template map<SuperCellSize > (threadIndex);

    __shared__ uint64_cu counterValue;
    __shared__ FrameType *frame;
    __shared__ bool isValid;

    if (linearThreadIdx == 0)
    {
        counterValue = 0;
        frame = &(parBox.getLastFrame(block, isValid));
        if (!isValid)
        {
            counterBox(counterCell) = counterValue;
        }
    }
    __syncthreads();
    if (!isValid)
        return; //end kernel if we have no frames

    bool isParticle = (*frame)[linearThreadIdx][multiMask_];

    while (isValid)
    {
        if (isParticle)
        {
            atomicAdd(&counterValue, static_cast<uint64_cu> (1LU));
        }
        __syncthreads();
        if (linearThreadIdx == 0)
        {
            frame = &(parBox.getPreviousFrame(*frame, isValid));
        }
        isParticle = true;
        __syncthreads();
    }

    if (linearThreadIdx == 0)
        counterBox(counterCell) = counterValue;
}
__global__ void kernelAddOneParticle(ParBox pb,
                                     DataSpace<simDim> superCell, DataSpace<simDim> parLocalCell)
{
    typedef typename ParBox::FrameType FRAME;

    FRAME *frame;

    int linearIdx = DataSpaceOperations<simDim>::template map<MappingDesc::SuperCellSize > (parLocalCell);

    float_X parWeighting = NUM_EL_PER_PARTICLE;

    frame = &(pb.getEmptyFrame());
    pb.setAsLastFrame(*frame, superCell);




    // many particle loop:
    for (unsigned i = 0; i < 1; ++i)
    {
        PMACC_AUTO(par,(*frame)[i]);
        floatD_X pos;
        for(int i=0; i<simDim; ++i)
          pos[i] = 0.5;

        const float_X GAMMA0_X = 1.0f / sqrtf(1.0f - float_X(BETA0_X * BETA0_X));
        const float_X GAMMA0_Y = 1.0f / sqrtf(1.0f - float_X(BETA0_Y * BETA0_Y));
        const float_X GAMMA0_Z = 1.0f / sqrtf(1.0f - float_X(BETA0_Z * BETA0_Z));
        float3_X mom = float3_X(
                                GAMMA0_X * getMass<FRAME>(parWeighting) * float_X(BETA0_X) * SPEED_OF_LIGHT,
                                GAMMA0_Y * getMass<FRAME>(parWeighting) * float_X(BETA0_Y) * SPEED_OF_LIGHT,
                                GAMMA0_Z * getMass<FRAME>(parWeighting) * float_X(BETA0_Z) * SPEED_OF_LIGHT
                                );

        par[position_] = pos;
        par[momentum_] = mom;
        par[multiMask_] = 1;
        par[localCellIdx_] = linearIdx;
        par[weighting_] = parWeighting;

#if(ENABLE_RADIATION == 1)
        par[momentumPrev1_] = float3_X(0.f, 0.f, 0.f);
#if(RAD_MARK_PARTICLE>1) || (RAD_ACTIVATE_GAMMA_FILTER!=0)
        /*this code tree is only passed if we not select any particle*/
        par[radiationFlag_] = true;
#endif
#endif
    }
}
__global__ void
kernelParticleDensity(ParBox pb,
                      DataBox<PitchedBox<Type_, DIM2> > image,
                      DataSpace<DIM2> transpose,
                      int slice,
                      uint32_t globalOffset,
                      uint32_t sliceDim,
                      Mapping mapper)
{
    typedef typename ParBox::FrameType FRAME;
    typedef typename MappingDesc::SuperCellSize Block;
    __shared__ FRAME *frame;
    __shared__ bool isValid;
    __syncthreads(); /*wait that all shared memory is initialised*/

    bool isImageThread = false;

    const DataSpace<simDim> threadId(threadIdx);
    const DataSpace<DIM2> localCell(threadId[transpose.x()], threadId[transpose.y()]);
    const DataSpace<simDim> block = mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx));
    const DataSpace<simDim> blockOffset((block - 1) * Block::getDataSpace());


    int localId = threadIdx.z * Block::x * Block::y + threadIdx.y * Block::x + threadIdx.x;


    if (localId == 0)
        isValid = false;
    __syncthreads();

    //\todo: guard size should not be set to (fixed) 1 here
    const DataSpace<simDim> realCell(blockOffset + threadId); //delete guard from cell idx


    uint32_t globalCell = realCell[sliceDim] + globalOffset;

    if (globalCell == slice)
    {
        isValid = true;
        isImageThread = true;
    }
    __syncthreads();

    if (!isValid)
        return;

    /*index in image*/
    DataSpace<DIM2> imageCell(
                              realCell[transpose.x()],
                              realCell[transpose.y()]);


    // counter is always DIM2
    typedef DataBox < PitchedBox< float_X, DIM2 > > SharedMem;
    extern __shared__ float_X shBlock[];
    __syncthreads(); /*wait that all shared memory is initialised*/

    const DataSpace<simDim> blockSize(blockDim);
    SharedMem counter(PitchedBox<float_X, DIM2 > ((float_X*) shBlock,
                                              DataSpace<DIM2 > (),
                                              blockSize[transpose.x()] * sizeof (float_X)));

    if (isImageThread)
    {
        counter(localCell) = float_X(0.0);
    }


    if (localId == 0)
    {
        frame = &(pb.getFirstFrame(block, isValid));
    }
    __syncthreads();

    while (isValid) //move over all Frames
    {
        PMACC_AUTO(particle,(*frame)[localId]);
        if (particle[multiMask_] == 1)
        {
            int cellIdx = particle[localCellIdx_];
            // we only draw the first slice of cells in the super cell (z == 0)
            const DataSpace<DIM3> particleCellId(DataSpaceOperations<DIM3>::template map<Block > (cellIdx));
            uint32_t globalParticleCell = particleCellId[sliceDim] + globalOffset + blockOffset[sliceDim];
            if (globalParticleCell == slice)
            {
                const DataSpace<DIM2> reducedCell(particleCellId[transpose.x()], particleCellId[transpose.y()]);
                atomicAddWrapper(&(counter(reducedCell)), particle[weighting_] / NUM_EL_PER_PARTICLE);
            }
        }
        __syncthreads();

        if (localId == 0)
        {
            frame = &(pb.getNextFrame(*frame, isValid));
        }
        __syncthreads();
    }


    if (isImageThread)
    {
        image(imageCell) = (Type_) counter(localCell);
    }
}
__global__ void kernelAddOneParticle(ParBox pb,
                                     DataSpace<simDim> superCell, DataSpace<simDim> parLocalCell)
{
    typedef typename ParBox::FrameType FRAME;

    FRAME *frame;

    int linearIdx = DataSpaceOperations<simDim>::template map<MappingDesc::SuperCellSize > (parLocalCell);

    float_X parWeighting = NUM_EL_PER_PARTICLE;

    frame = &(pb.getEmptyFrame());
    pb.setAsLastFrame(*frame, superCell);




    // many particle loop:
    for (unsigned i = 0; i < 1; ++i)
    {
        PMACC_AUTO(par, (*frame)[i]);

        /** we now initialize all attributes of the new particle to their default values
         *   some attributes, such as the position, localCellIdx, weighting or the
         *   multiMask (\see AttrToIgnore) of the particle will be set individually
         *   in the following lines since they are already known at this point.
         */
        {
            typedef typename ParBox::FrameType FrameType;
            typedef typename FrameType::ValueTypeSeq ParticleAttrList;
            typedef bmpl::vector4<position<>, multiMask, localCellIdx, weighting> AttrToIgnore;
            typedef typename ResolveAndRemoveFromSeq<ParticleAttrList, AttrToIgnore>::type ParticleCleanedAttrList;

            algorithms::forEach::ForEach<ParticleCleanedAttrList,
                SetAttributeToDefault<bmpl::_1> > setToDefault;
            setToDefault(forward(par));
        }

        float3_X pos = float3_X(0.5, 0.5, 0.5);

        const float_X GAMMA0 = (float_X) (1.0 / sqrt(1.0 - (BETA0_X * BETA0_X + BETA0_Y * BETA0_Y + BETA0_Z * BETA0_Z)));
        float3_X mom = float3_X(
                                     GAMMA0 * attribute::getMass(parWeighting,par) * float_X(BETA0_X) * SPEED_OF_LIGHT,
                                     GAMMA0 * attribute::getMass(parWeighting,par) * float_X(BETA0_Y) * SPEED_OF_LIGHT,
                                     GAMMA0 * attribute::getMass(parWeighting,par) * float_X(BETA0_Z) * SPEED_OF_LIGHT
                                );


        par[position_] = pos;
        par[momentum_] = mom;
        par[multiMask_] = 1;
        par[localCellIdx_] = linearIdx;
        par[weighting_] = parWeighting;

#if(ENABLE_RADIATION == 1)
#if(RAD_MARK_PARTICLE>1) || (RAD_ACTIVATE_GAMMA_FILTER!=0)
        par[radiationFlag_] = true;
#endif
#endif
    }
}
Example #5
0
__global__ void
kernelPaintParticles3D(ParBox pb,
                       DataBox<PitchedBox<float3_X, DIM2> > image,
                       DataSpace<DIM2> transpose,
                       int slice,
                       uint32_t globalOffset,
                       uint32_t sliceDim,
                       Mapping mapper)
{
    typedef typename ParBox::FrameType FRAME;
    typedef typename MappingDesc::SuperCellSize Block;
    __shared__ FRAME *frame;
    __shared__ bool isValid;
    __syncthreads(); /*wait that all shared memory is initialised*/
    bool isImageThread = false;

    const DataSpace<simDim> threadId(threadIdx);
    const DataSpace<DIM2> localCell(threadId[transpose.x()], threadId[transpose.y()]);
    const DataSpace<simDim> block = mapper.getSuperCellIndex(DataSpace<simDim > (blockIdx));
    const DataSpace<simDim> blockOffset((block - 1) * Block::getDataSpace());


    int localId = threadIdx.z * Block::x * Block::y + threadIdx.y * Block::x + threadIdx.x;


    if (localId == 0)
        isValid = false;
    __syncthreads();

    //\todo: guard size should not be set to (fixed) 1 here
    const DataSpace<simDim> realCell(blockOffset + threadId); //delete guard from cell idx

#if(SIMDIM==DIM3)
    uint32_t globalCell = realCell[sliceDim] + globalOffset;

    if (globalCell == slice)
#endif
    {
        atomicExch((int*) &isValid, 1); /*WAW Error in cuda-memcheck racecheck*/
        isImageThread = true;
    }
    __syncthreads();

    if (!isValid)
        return;

    /*index in image*/
    DataSpace<DIM2> imageCell(
                              realCell[transpose.x()],
                              realCell[transpose.y()]);


    // counter is always DIM2
    typedef DataBox < PitchedBox< float_X, DIM2 > > SharedMem;
    extern __shared__ float_X shBlock[];
    __syncthreads(); /*wait that all shared memory is initialised*/

    const DataSpace<simDim> blockSize(blockDim);
    SharedMem counter(PitchedBox<float_X, DIM2 > ((float_X*) shBlock,
                                                  DataSpace<DIM2 > (),
                                                  blockSize[transpose.x()] * sizeof (float_X)));

    if (isImageThread)
    {
        counter(localCell) = float_X(0.0);
    }


    if (localId == 0)
    {
        frame = &(pb.getFirstFrame(block, isValid));
    }
    __syncthreads();

    while (isValid) //move over all Frames
    {
        PMACC_AUTO(particle,(*frame)[localId]);
        if (particle[multiMask_] == 1)
        {
            int cellIdx = particle[localCellIdx_];
            // we only draw the first slice of cells in the super cell (z == 0)
            const DataSpace<simDim> particleCellId(DataSpaceOperations<simDim>::template map<Block > (cellIdx));
#if(SIMDIM==DIM3)
            uint32_t globalParticleCell = particleCellId[sliceDim] + globalOffset + blockOffset[sliceDim];
            if (globalParticleCell == slice)
#endif
            {
                const DataSpace<DIM2> reducedCell(particleCellId[transpose.x()], particleCellId[transpose.y()]);
                atomicAddWrapper(&(counter(reducedCell)), particle[weighting_] / NUM_EL_PER_PARTICLE);
            }
        }
        __syncthreads();

        if (localId == 0)
        {
            frame = &(pb.getNextFrame(*frame, isValid));
        }
        __syncthreads();
    }


    if (isImageThread)
    {
        /** Note: normally, we would multiply by NUM_EL_PER_PARTICLE again.
         *  BUT: since we are interested in a simple value between 0 and 1,
         *       we stay with this number (normalized to the order of macro
         *       particles) and devide by the number of typical macro particles
         *       per cell
         */
        float_X value = counter(localCell)
            / float_X(particleInit::NUM_PARTICLES_PER_CELL); // * NUM_EL_PER_PARTICLE;
        if (value > 1.0) value = 1.0;

        //image(imageCell).x() = value;
        visPreview::preParticleDensCol::addRGB(image(imageCell),
                                               value,
                                               visPreview::preParticleDens_opacity);

        // cut to [0, 1]
        if (image(imageCell).x() < float_X(0.0)) image(imageCell).x() = float_X(0.0);
        if (image(imageCell).x() > float_X(1.0)) image(imageCell).x() = float_X(1.0);
        if (image(imageCell).y() < float_X(0.0)) image(imageCell).y() = float_X(0.0);
        if (image(imageCell).y() > float_X(1.0)) image(imageCell).y() = float_X(1.0);
        if (image(imageCell).z() < float_X(0.0)) image(imageCell).z() = float_X(0.0);
        if (image(imageCell).z() > float_X(1.0)) image(imageCell).z() = float_X(1.0);
    }
}