GMM_EXPORT cudaError_t cudaMemcpy( void *dst, const void *src, size_t count, enum cudaMemcpyKind kind) { cudaError_t ret; if (initialized) { if (kind == cudaMemcpyHostToDevice) ret = gmm_cudaMemcpyHtoD(dst, src, count); else if (kind == cudaMemcpyDeviceToHost) ret = gmm_cudaMemcpyDtoH(dst, src, count); else if (kind == cudaMemcpyDeviceToDevice) ret = gmm_cudaMemcpyDtoD(dst, src, count); else { gprint(WARN, "HtoH memory copy not supported by GMM\n"); ret = nv_cudaMemcpy(dst, src, count, kind); } } else { gprint(WARN, "cudaMemcpy called outside GMM\n"); ret = nv_cudaMemcpy(dst, src, count, kind); } return ret; }
cudaError_t cudaMemcpy(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind) { static cudaError_t (*nv_cudaMemcpy)(void *, const void *, size_t, enum cudaMemcpyKind) = NULL; cudaError_t ret; struct timeval t; if(!nv_cudaMemcpy) { nv_cudaMemcpy = dlsym(RTLD_NEXT, "cudaMemcpy"); if(!nv_cudaMemcpy) { fprintf(stderr, "failed to find symbol cudaMemcpy: %s\n", dlerror()); return cudaErrorSharedObjectSymbolNotFound; } } gettimeofday(&t, NULL); printf("[gvm] %lf intercepting cudaMemcpy\n", t.tv_sec + t.tv_usec / 1000000.0); ret = nv_cudaMemcpy(dst, src, count, kind); cudaThreadSynchronize(); gettimeofday(&t, NULL); printf("[gvm] %lf intercepted cudaMemcpy( %lx %lx %ld %d ) = %d\n", t.tv_sec + t.tv_usec / 1000000.0, (unsigned long)dst, (unsigned long)src, count, kind, (int)ret); return ret; }