cudaError_t cudaMalloc(void **devPtr, size_t size) { static cudaError_t (*nv_cudaMalloc)(void **, size_t) = NULL; cudaError_t ret; struct timeval t; if(!nv_cudaMalloc) { nv_cudaMalloc = dlsym(RTLD_NEXT, "cudaMalloc"); //nv_cudaMalloc = dlsym(RTLD_NEXT, "cudaMalloc_v2"); if(!nv_cudaMalloc) { fprintf(stderr, "failed to find symbol cudaMalloc: %s\n", dlerror()); show_stackframe(); return cudaErrorSharedObjectSymbolNotFound; } } gettimeofday(&t, NULL); printf("[gvm] %lf intercepting cudaMalloc at %lx\n", t.tv_sec + t.tv_usec / 1000000.0, (unsigned long)devPtr); do { ret = nv_cudaMalloc(devPtr, size); } while (ret != cudaSuccess); gettimeofday(&t, NULL); printf("[gvm] %lf intercepted cudaMalloc( %lx %ld ) = %d\n", t.tv_sec + t.tv_usec / 1000000.0, (unsigned long)(*devPtr), size, (int)ret); return ret; }
// GMM-specific: allowing passing dptr array hints. GMM_EXPORT cudaError_t cudaMallocEx(void **devPtr, size_t size, int flags) { if (initialized) return gmm_cudaMalloc(devPtr, size, flags); else { gprint(WARN, "cudaMallocEx called outside GMM\n"); return nv_cudaMalloc(devPtr, size); } }
GMM_EXPORT cudaError_t cudaMalloc(void **devPtr, size_t size) { cudaError_t ret; if (initialized) ret = gmm_cudaMalloc(devPtr, size, 0); else { // TODO: We may need to remember those device memory allocated // before GMM was initialized, so that later when they are // used in cudaMemcpy or other functions we can treat them // specially. gprint(WARN, "cudaMalloc called outside GMM\n"); ret = nv_cudaMalloc(devPtr, size); } return ret; }