void* realloc(void *ptr, size_t size) throw () { initTrueFuncs(); void *result = true_realloc (ptr,size); if (ptr==0 && result!=0) { atomicInc(&scidb::_mallocStats[scidb::MALLOC] , 1); } else if (ptr!=0 && result==0) { atomicInc(&scidb::_mallocStats[scidb::FREE] , 1); } return result; }
void free (void *ptr) throw () { initTrueFuncs(); true_free (ptr); if (ptr!=0) { atomicInc(&scidb::_mallocStats[scidb::FREE] , 1); } }
/** * Adds an element at the end of the buffer in an atomic operation. * * @param val element of type VALUE to add */ HDINLINE void push(VALUE val) { #if !defined(__CUDA_ARCH__) // Host code path const TYPE old_idx = (indexBox[PUSH]); old_idx >= size - 1 ? (indexBox[PUSH]) = 0 : (indexBox[PUSH]) = old_idx + 1; #else const TYPE old_idx = atomicInc(&(indexBox[PUSH]), size - 1); #endif (*this)[old_idx] = val; }
void* malloc(size_t size) throw () { initTrueFuncs(); void * result = true_malloc (size); if (result!=0) { atomicInc(&scidb::_mallocStats[scidb::MALLOC] , 1); } return result; }
void *calloc(size_t nmemb, size_t size) throw () { if (!true_calloc) { // dlsym may call calloc internally // to prevent an infinite loop just return NULL return 0; } void * result = true_calloc (nmemb, size); if (result!=0) { atomicInc(&scidb::_mallocStats[scidb::MALLOC] , 1); } return result; }
__global__ void HIP_FUNCTION(testKernel,int *g_odata) { // access thread id const unsigned int tid = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x; // Test various atomic instructions // Arithmetic atomic instructions // Atomic addition atomicAdd(&g_odata[0], 10); // Atomic subtraction (final should be 0) atomicSub(&g_odata[1], 10); // Atomic exchange atomicExch(&g_odata[2], tid); // Atomic maximum atomicMax(&g_odata[3], tid); // Atomic minimum atomicMin(&g_odata[4], tid); // Atomic increment (modulo 17+1) //atomicInc((unsigned int *)&g_odata[5], 17); atomicInc((unsigned int *)&g_odata[5]); // Atomic decrement // atomicDec((unsigned int *)&g_odata[6], 137); atomicDec((unsigned int *)&g_odata[6]); // Atomic compare-and-swap atomicCAS(&g_odata[7], tid-1, tid); // Bitwise atomic instructions // Atomic AND atomicAnd(&g_odata[8], 2*tid+7); // Atomic OR atomicOr(&g_odata[9], 1 << tid); // Atomic XOR atomicXor(&g_odata[10], tid); }
/** * Removes an element from the front of the buffer in an atomic operation. * * @return the element of type VALUE removed from the buffer */ HDINLINE VALUE &pop() { #if (__CUDA_ARCH__>=200) const TYPE push_idx = indexBox[PUSH]; // == B #endif //!\todo check if we can use atomicInc #if !defined(__CUDA_ARCH__) // Host code path const TYPE old_idx = (indexBox[POP]); old_idx >= size - 1 ? (indexBox[POP]) = 0 : (indexBox[POP]) = old_idx + 1; #else const TYPE old_idx = atomicInc(&(indexBox[POP]), size - 1); #endif #if (__CUDA_ARCH__>=200) /*old_idx == F*/ const TYPE new_idx = (old_idx + 1) % size; //==F' const bool a = (old_idx > push_idx); const bool b = (old_idx < push_idx); const bool c = (new_idx >= push_idx); const bool d = (new_idx <= push_idx); const bool e = (new_idx < old_idx); const bool f = !(e); //F'>=F const bool overflow = (b && c && f) || (e && ((a && c) || (b && d))); if (overflow) printf("Ringbuffer: memory overflow\n"); #endif return (*this)[old_idx]; }
void incref() { atomicInc(m_refcount); }