$$__device__$$ void edu_syr_pcpratts_exitMonitorMem($$__global$$ char * gc_info, char * mem, int old){ if(old == -1){ __threadfence(); atomicExch((int *) mem, -1); } }
__inline__ __device__ void atomic_assign( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val ) { // (void) __ullAtomicExch( (int*) dest , *((int*)&val) ); (void) atomicExch( ((int*)dest) , *((int*)&val) ); }
__inline__ __device__ T atomic_exchange( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val ) { // int tmp = __ullAtomicExch( (int*) dest , *((int*)&val) ); int tmp = atomicExch( ((int*)dest) , *((int*)&val) ); return *((T*)&tmp); }
$$__device__$$ void edu_syr_pcpratts_exitMonitorRef($$__global$$ char * gc_info, int thisref, int old){ char * mem = edu_syr_pcpratts_gc_deref(gc_info, thisref); mem += 12; if(old == -1){ __threadfence(); atomicExch((int *) mem, -1); } }
__inline__ __device__ void atomic_assign( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && sizeof(T) == sizeof(unsigned long long int) , const T & >::type val ) { typedef unsigned long long int type ; // (void) __ullAtomicExch( (type*) dest , *((type*)&val) ); (void) atomicExch( ((type*)dest) , *((type*)&val) ); }
__inline__ __device__ T atomic_exchange( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && sizeof(T) == sizeof(unsigned long long int) , const T & >::type val ) { typedef unsigned long long int type ; // type tmp = __ullAtomicExch( (type*) dest , *((type*)&val) ); type tmp = atomicExch( ((type*)dest) , *((type*)&val) ); return *((T*)&tmp); }
__inline__ __device__ T atomic_exchange( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val ) { // int tmp = __ullAtomicExch( (int*) dest , *((int*)&val) ); #if defined( KOKKOS_ENABLE_RFO_PREFETCH ) _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); #endif int tmp = atomicExch( ((int*)dest) , *((int*)&val) ); return *((T*)&tmp); }
__global__ void HIP_FUNCTION(testKernel,int *g_odata) { // access thread id const unsigned int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; // Test various atomic instructions // Arithmetic atomic instructions // Atomic addition atomicAdd(&g_odata[0], 10); // Atomic subtraction (final should be 0) atomicSub(&g_odata[1], 10); // Atomic exchange atomicExch(&g_odata[2], tid); // Atomic maximum atomicMax(&g_odata[3], tid); // Atomic minimum atomicMin(&g_odata[4], tid); // Atomic increment (modulo 17+1) //atomicInc((unsigned int *)&g_odata[5], 17); atomicInc((unsigned int *)&g_odata[5]); // Atomic decrement // atomicDec((unsigned int *)&g_odata[6], 137); atomicDec((unsigned int *)&g_odata[6]); // Atomic compare-and-swap atomicCAS(&g_odata[7], tid-1, tid); // Bitwise atomic instructions // Atomic AND atomicAnd(&g_odata[8], 2*tid+7); // Atomic OR atomicOr(&g_odata[9], 1 << tid); // Atomic XOR atomicXor(&g_odata[10], tid); }
__host__ __device__ typename enable_if< sizeof(Integer64) == 8 >::type atomic_store(Integer64 *x, Integer64 y) { #if defined(__CUDA_ARCH__) atomicExch(x, y); #elif defined(__GNUC__) return __atomic_store_n(x, y, __ATOMIC_SEQ_CST); #elif defined(_MSC_VER) InterlockedExchange64(x, y); #elif defined(__clang__) __c11_atomic_store(x, y); #else #error "No atomic_store_n implementation." #endif }
__inline__ __device__ unsigned long long int atomic_exchange( volatile unsigned long long int * const dest , const unsigned long long int val ) { // return __ullAtomicExch( (unsigned long long*) dest , val ); return atomicExch( (unsigned long long*) dest , val ); }
__inline__ __device__ int atomic_exchange( volatile int * const dest , const int val ) { // return __iAtomicExch( (int*) dest , val ); return atomicExch( (int*) dest , val ); }
DINLINE bool isFirstParticle(T_Acc const & acc) { return atomicExch( &this->firstParticleFlag, 1 ) == 0; }