Пример #1
0
__device__ T blockScanInclusive(T data, volatile T* smem, uint tid)
{
    if (THREADS_NUM > WARP_SIZE)
    {
        // bottom-level inclusive warp scan
        T warpResult = warpScanInclusive(data, smem, tid);

        __syncthreads();

        // save top elements of each warp for exclusive warp scan
        // sync to wait for warp scans to complete (because s_Data is being overwritten)
        if ((tid & (WARP_SIZE - 1)) == (WARP_SIZE - 1))
        {
            smem[tid >> LOG_WARP_SIZE] = warpResult;
        }
Пример #2
0
__device__ __forceinline__ T warpScanExclusive(T data, volatile T* smem, uint tid)
{
    return warpScanInclusive(data, smem, tid) - data;
}