Exemple #1
0
__global__ void HIP_FUNCTION(testKernel,int *g_odata)
{
    // access thread id
    const unsigned int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;

    // Test various atomic instructions

    // Arithmetic atomic instructions

    // Atomic addition
    atomicAdd(&g_odata[0], 10);

    // Atomic subtraction (final should be 0)
    atomicSub(&g_odata[1], 10);

    // Atomic exchange
    atomicExch(&g_odata[2], tid);

    // Atomic maximum
    atomicMax(&g_odata[3], tid);

    // Atomic minimum
    atomicMin(&g_odata[4], tid);

    // Atomic increment (modulo 17+1)
    //atomicInc((unsigned int *)&g_odata[5], 17);
    atomicInc((unsigned int *)&g_odata[5]);

    // Atomic decrement
   // atomicDec((unsigned int *)&g_odata[6], 137);
    atomicDec((unsigned int *)&g_odata[6]);

    // Atomic compare-and-swap
    atomicCAS(&g_odata[7], tid-1, tid);

    // Bitwise atomic instructions

    // Atomic AND
    atomicAnd(&g_odata[8], 2*tid+7);

    // Atomic OR
    atomicOr(&g_odata[9], 1 << tid);

    // Atomic XOR
    atomicXor(&g_odata[10], tid);
}
Exemple #2
0
__host__ __device__
typename enable_if<
  sizeof(Integer64) == 8,
  Integer64
>::type
atomic_fetch_xor(Integer64 *x, Integer64 y)
{
#if defined(__CUDA_ARCH__)
  return atomicXor(x, y);
#elif defined(__GNUC__)
  return __atomic_fetch_xor(x, y, __ATOMIC_SEQ_CST);
#elif defined(_MSC_VER)
  return InterlockedOr64(x, y);
#elif defined(__clang__)
  return __c11_atomic_fetch_xor(x, y)
#else
#error "No atomic_fetch_xor implementation."
#endif
}