T* memAlloc(const size_t &elements) { managerInit(); T* ptr = NULL; size_t alloc_bytes = divup(sizeof(T) * elements, 1024) * 1024; if (elements > 0) { // FIXME: Add better checks for garbage collection // Perhaps look at total memory available as a metric if (memory_map.size() > MAX_BUFFERS || used_bytes >= MAX_BYTES) { garbageCollect(); } for(mem_iter iter = memory_map.begin(); iter != memory_map.end(); iter++) { mem_info info = iter->second; if (info.is_free && info.bytes == alloc_bytes) { iter->second.is_free = false; used_bytes += alloc_bytes; return (T *)iter->first; } } // Perform garbage collection if memory can not be allocated ptr = (T *)malloc(alloc_bytes); mem_info info = {false, alloc_bytes}; memory_map[ptr] = info; used_bytes += alloc_bytes; } return ptr; }
T* memAlloc(const size_t &elements) { managerInit(); T* ptr = NULL; size_t alloc_bytes = divup(sizeof(T) * elements, memory_resolution) * memory_resolution; if (elements > 0) { std::lock_guard<std::mutex> lock(memory_map_mutex); // FIXME: Add better checks for garbage collection // Perhaps look at total memory available as a metric if (memory_map.size() > MAX_BUFFERS || used_bytes >= MAX_BYTES) { garbageCollect(); } for(mem_iter iter = memory_map.begin(); iter != memory_map.end(); ++iter) { mem_info info = iter->second; if ( info.is_free && !info.is_unlinked && info.bytes == alloc_bytes) { iter->second.is_free = false; used_bytes += alloc_bytes; used_buffers++; return (T *)iter->first; } } // Perform garbage collection if memory can not be allocated ptr = (T *)malloc(alloc_bytes); if (ptr == NULL) { AF_ERROR("Can not allocate memory", AF_ERR_NO_MEM); } mem_info info = {false, false, alloc_bytes}; memory_map[ptr] = info; used_bytes += alloc_bytes; used_buffers++; total_bytes += alloc_bytes; } return ptr; }
T* memAlloc(const size_t &elements) { managerInit(); int n = getActiveDeviceId(); T* ptr = NULL; size_t alloc_bytes = divup(sizeof(T) * elements, memory_resolution) * memory_resolution; if (elements > 0) { // FIXME: Add better checks for garbage collection // Perhaps look at total memory available as a metric if (memory_maps[n].size() >= MAX_BUFFERS || used_bytes[n] >= MAX_BYTES) { garbageCollect(); } for(mem_iter iter = memory_maps[n].begin(); iter != memory_maps[n].end(); ++iter) { mem_info info = iter->second; if ( info.is_free && !info.is_unlinked && info.bytes == alloc_bytes) { iter->second.is_free = false; used_bytes[n] += alloc_bytes; used_buffers[n]++; return (T *)iter->first; } } // Perform garbage collection if memory can not be allocated if (cudaMalloc((void **)&ptr, alloc_bytes) != cudaSuccess) { garbageCollect(); CUDA_CHECK(cudaMalloc((void **)(&ptr), alloc_bytes)); } mem_info info = {false, false, alloc_bytes}; memory_maps[n][ptr] = info; used_bytes[n] += alloc_bytes; used_buffers[n]++; total_bytes[n] += alloc_bytes; } return ptr; }
T* pinnedAlloc(const size_t &elements) { managerInit(); T* ptr = NULL; // Allocate the higher megabyte. Overhead of creating pinned memory is // more so we want more resuable memory. size_t alloc_bytes = divup(sizeof(T) * elements, 1048576) * 1048576; if (elements > 0) { // FIXME: Add better checks for garbage collection // Perhaps look at total memory available as a metric if (pinned_maps.size() >= MAX_BUFFERS || pinned_used_bytes >= MAX_BYTES) { pinnedGarbageCollect(); } for(mem_iter iter = pinned_maps.begin(); iter != pinned_maps.end(); ++iter) { mem_info info = iter->second; if (info.is_free && info.bytes == alloc_bytes) { iter->second.is_free = false; pinned_used_bytes += alloc_bytes; return (T *)iter->first; } } // Perform garbage collection if memory can not be allocated if (cudaMallocHost((void **)&ptr, alloc_bytes) != cudaSuccess) { pinnedGarbageCollect(); CUDA_CHECK(cudaMallocHost((void **)(&ptr), alloc_bytes)); } mem_info info = {false, false, alloc_bytes}; pinned_maps[ptr] = info; pinned_used_bytes += alloc_bytes; } return (T*)ptr; }
T* pinnedAlloc(const size_t &elements) { managerInit(); return (T *)pinnedBufferAlloc(elements * sizeof(T)); }
T *memAlloc(const size_t &elements) { managerInit(); return (T *)bufferAlloc(elements * sizeof(T)); }
/** Make a new timer. Note - the timer index selected will be used for all subsequent timers created. @param timer The hardware timer to use - valid options are 0, 1 and 2. 0 is the default. */ Timer::Timer(int timer) { if(!manager.timer_count++) managerInit(timer); }