/* \todo not working if we have no elements on stack*/ HDINLINE VALUE &pop() { #if !defined(__CUDA_ARCH__) // Host code path TYPE old_addr = --(*currentSize); #else TYPE old_addr = atomicSub(currentSize, 1) - 1; #endif return (*this)[old_addr]; }
__global__ void HIP_FUNCTION(testKernel,int *g_odata) { // access thread id const unsigned int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; // Test various atomic instructions // Arithmetic atomic instructions // Atomic addition atomicAdd(&g_odata[0], 10); // Atomic subtraction (final should be 0) atomicSub(&g_odata[1], 10); // Atomic exchange atomicExch(&g_odata[2], tid); // Atomic maximum atomicMax(&g_odata[3], tid); // Atomic minimum atomicMin(&g_odata[4], tid); // Atomic increment (modulo 17+1) //atomicInc((unsigned int *)&g_odata[5], 17); atomicInc((unsigned int *)&g_odata[5]); // Atomic decrement // atomicDec((unsigned int *)&g_odata[6], 137); atomicDec((unsigned int *)&g_odata[6]); // Atomic compare-and-swap atomicCAS(&g_odata[7], tid-1, tid); // Bitwise atomic instructions // Atomic AND atomicAnd(&g_odata[8], 2*tid+7); // Atomic OR atomicOr(&g_odata[9], 1 << tid); // Atomic XOR atomicXor(&g_odata[10], tid); }
__host__ __device__ typename enable_if< sizeof(Integer64) == 8, Integer64 >::type atomic_fetch_sub(Integer64 *x, Integer64 y) { #if defined(__CUDA_ARCH__) return atomicSub(x, y); #elif defined(__GNUC__) return __atomic_fetch_sub(x, y, __ATOMIC_SEQ_CST); #elif defined(_MSC_VER) return InterlockedExchangeAdd64(x, -y); #elif defined(__clang__) return __c11_atomic_fetch_sub(x, y) #else #error "No atomic_fetch_sub implementation." #endif }
/** * Removes count elements from the stack in an atomic operation. * * \todo This method unse int32_t and limits the element count to INT_MAX * * @param count number of elements to pop from stack * @return a TileDataBox of type VALUE with count elements */ HDINLINE TileDataBox<VALUE> popN(TYPE count) { #if !defined(__CUDA_ARCH__) // Host code path int32_t old_addr = (*currentSize); (*currentSize) -= count; #else int32_t old_addr = (int32_t) atomicSub((int32_t*) currentSize, (int32_t) count); #endif if (old_addr <= 0) { *currentSize = 0; return TileDataBox<VALUE > (this->fixedPointer, DataSpace<DIM1 > (0), 0); } if (old_addr < (int32_t) count) { *currentSize = 0; return TileDataBox<VALUE > (this->fixedPointer, DataSpace<DIM1 > (0), old_addr); } return TileDataBox<VALUE > (this->fixedPointer, DataSpace<DIM1 > (old_addr - count), count); }
__inline__ __device__ unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val ) { return atomicSub((unsigned int*)dest,val); }
__inline__ __device__ int atomic_fetch_sub( volatile int * const dest , const int val ) { return atomicSub((int*)dest,val); }