__device__ __forceinline__ static void max(R* ptr, val_type val) { atomicMax(ptr, val.x); atomicMax(ptr + 1, val.y); atomicMax(ptr + 2, val.z); atomicMax(ptr + 3, val.w); }
EPP_DEVICE void getRequiredSNSGSize( unsigned int *output, SliceIndex *minmax, unsigned int index ) { atomicMax( output, minmax[ index ].offset ); }
EPP_DEVICE void detectMinMax( unsigned int x_boxel_count, unsigned int y_boxel_count, unsigned int z_boxel_count, SliceIndex *minmax, float boxel_size, const float3 &_position ) { int3 position_in_grid = emath::vector_cast< int3 >( _position / boxel_size ); position_in_grid.x = static_cast< unsigned int >( position_in_grid.x ) % x_boxel_count; position_in_grid.y = static_cast< unsigned int >( position_in_grid.y ) % y_boxel_count; position_in_grid.z = static_cast< unsigned int >( position_in_grid.z ) % z_boxel_count; atomicMin( &( minmax[ position_in_grid.z ].x_min ), position_in_grid.x ); atomicMax( &( minmax[ position_in_grid.z ].x_max ), position_in_grid.x ); atomicMin( &( minmax[ position_in_grid.z ].y_min ), position_in_grid.y ); atomicMax( &( minmax[ position_in_grid.z ].y_max ), position_in_grid.y ); }
__device__ void stop() { if(intervall >= 0 && intervall < numberOfIntervalls) { unsigned long long diff = clock64() - currentTime; __syncthreads(); atomicAdd(&counter[intervall].avg, diff); atomicMin(&counter[intervall].min, diff); atomicMax(&counter[intervall].max, diff); atomicAdd(&counter[intervall].iter, 1); intervall++; __syncthreads(); currentTime = clock64(); } }
__device__ void reduceGrid(work_type* result, int tid) { __shared__ work_type sminval[BLOCK_SIZE]; __shared__ work_type smaxval[BLOCK_SIZE]; minimum<work_type> minOp; maximum<work_type> maxOp; blockReduce<BLOCK_SIZE>(smem_tuple(sminval, smaxval), tie(mymin, mymax), tid, make_tuple(minOp, maxOp)); if (tid == 0) { atomicMin(result, mymin); atomicMax(result + 1, mymax); } }
__global__ void HIP_FUNCTION(testKernel,int *g_odata) { // access thread id const unsigned int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; // Test various atomic instructions // Arithmetic atomic instructions // Atomic addition atomicAdd(&g_odata[0], 10); // Atomic subtraction (final should be 0) atomicSub(&g_odata[1], 10); // Atomic exchange atomicExch(&g_odata[2], tid); // Atomic maximum atomicMax(&g_odata[3], tid); // Atomic minimum atomicMin(&g_odata[4], tid); // Atomic increment (modulo 17+1) //atomicInc((unsigned int *)&g_odata[5], 17); atomicInc((unsigned int *)&g_odata[5]); // Atomic decrement // atomicDec((unsigned int *)&g_odata[6], 137); atomicDec((unsigned int *)&g_odata[6]); // Atomic compare-and-swap atomicCAS(&g_odata[7], tid-1, tid); // Bitwise atomic instructions // Atomic AND atomicAnd(&g_odata[8], 2*tid+7); // Atomic OR atomicOr(&g_odata[9], 1 << tid); // Atomic XOR atomicXor(&g_odata[10], tid); }
__device__ __forceinline__ static void atomic(T* result, T myval) { atomicMax(result, myval); }
__device__ __forceinline__ static void max(R* ptr, R val) { atomicMax(ptr, val); }