__inline__ __device__ T atomic_exchange( volatile T * const dest , typename Kokkos::Impl::enable_if< ( sizeof(T) != 4 ) && ( sizeof(T) != 8 ) , const T >::type& val ) { T return_val; // This is a way to (hopefully) avoid dead lock in a warp #if defined( KOKKOS_ENABLE_RFO_PREFETCH ) _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); #endif int done = 0; unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1); unsigned int done_active = 0; while (active!=done_active) { if(!done) { if( Impl::lock_address_cuda_space( (void*) dest ) ) { return_val = *dest; *dest = val; Impl::unlock_address_cuda_space( (void*) dest ); done = 1; } } done_active = KOKKOS_IMPL_CUDA_BALLOT(done); } return return_val; }
__inline__ __device__ T atomic_compare_exchange( volatile T * const dest , const T & compare , typename Kokkos::Impl::enable_if< ( sizeof(T) != 4 ) && ( sizeof(T) != 8 ) , const T >::type& val ) { T return_val; // This is a way to (hopefully) avoid dead lock in a warp int done = 0; unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1); unsigned int done_active = 0; while (active!=done_active) { if(!done) { if( Impl::lock_address_cuda_space( (void*) dest ) ) { return_val = *dest; if( return_val == compare ) *dest = val; Impl::unlock_address_cuda_space( (void*) dest ); done = 1; } } done_active = KOKKOS_IMPL_CUDA_BALLOT(done); } return return_val; }
KOKKOS_INLINE_FUNCTION T atomic_oper_fetch( const Oper& op, volatile T * const dest , typename Kokkos::Impl::enable_if< ( sizeof(T) != 4 ) && ( sizeof(T) != 8 ) #if defined(KOKKOS_ENABLE_ASM) && defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST) && ( sizeof(T) != 16 ) #endif , const T >::type& val ) { #ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST while( !Impl::lock_address_host_space( (void*) dest ) ); T return_val = Oper::apply(*dest, val); *dest = return_val; Impl::unlock_address_host_space( (void*) dest ); return return_val; #elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA) T return_val; // This is a way to (hopefully) avoid dead lock in a warp int done = 0; unsigned int active = KOKKOS_IMPL_CUDA_BALLOT(1); unsigned int done_active = 0; while (active!=done_active) { if(!done) { if( Impl::lock_address_cuda_space( (void*) dest ) ) { return_val = Oper::apply(*dest, val); *dest = return_val; Impl::unlock_address_cuda_space( (void*) dest ); done=1; } } done_active = KOKKOS_IMPL_CUDA_BALLOT(done); } return return_val; #endif }