GMM_EXPORT cudaError_t cudaConfigureCall( dim3 gridDim, dim3 blockDim, size_t sharedMem, cudaStream_t stream) { cudaError_t ret; if (initialized) ret = gmm_cudaConfigureCall(gridDim, blockDim, sharedMem, stream); else { gprint(WARN, "cudaConfigureCall called outside GMM\n"); ret = nv_cudaConfigureCall(gridDim, blockDim, sharedMem, stream); } return ret; }
cudaError_t cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, cudaStream_t stream) { static cudaError_t (*nv_cudaConfigureCall)(dim3, dim3, size_t, cudaStream_t) = NULL; cudaError_t ret; struct timeval t; if(!nv_cudaStreamCreate) { nv_cudaStreamCreate = dlsym(RTLD_NEXT, "cudaStreamCreate"); if(!nv_cudaStreamCreate) { fprintf(stderr, "failed to find symbol cudaStreamCreate : %s\n", dlerror()); return cudaErrorSharedObjectSymbolNotFound; } } if(!mystream) ret = nv_cudaStreamCreate(&mystream); if(!nv_cudaConfigureCall) { nv_cudaConfigureCall = dlsym(RTLD_NEXT, "cudaConfigureCall"); if(!nv_cudaConfigureCall) { fprintf(stderr, "failed to find symbol cudaConfigureCall: %s\n", dlerror()); return cudaErrorSharedObjectSymbolNotFound; } } //gettimeofday(&t, NULL); //printf("[gvm] %lf intercepting cudaConfigureCall\n", t.tv_sec + t.tv_usec / 1000000.0); // segmentation fault with the statement below if //if (stream == 0) ret = nv_cudaConfigureCall(gridDim, blockDim, sharedMem, mystream); //else // ret = nv_cudaConfigureCall(gridDim, blockDim, sharedMem, stream); //gettimeofday(&t, NULL); //printf("[gvm] %lf intercepted cudaConfigureCall ( %d ) \n", t.tv_sec + t.tv_usec / 1000000.0, (int) mystream); return ret; }
cudaError_t cudaConfigureCall(dim3 gridDim, dim3 blockDim, size_t sharedMem, cudaStream_t stream) { static cudaError_t (*nv_cudaConfigureCall)(dim3, dim3, size_t, cudaStream_t) = NULL; cudaError_t ret; struct timeval t; if(!nv_cudaConfigureCall) { nv_cudaConfigureCall = dlsym(RTLD_NEXT, "cudaConfigureCall"); if(!nv_cudaConfigureCall) { fprintf(stderr, "failed to find symbol cudaConfigureCall: %s\n", dlerror()); return cudaErrorSharedObjectSymbolNotFound; } } gettimeofday(&t, NULL); printf("[gvm] %lf intercepting cudaConfigureCall\n", t.tv_sec + t.tv_usec / 1000000.0); ret = nv_cudaConfigureCall(gridDim, blockDim, sharedMem, stream); gettimeofday(&t, NULL); printf("[gvm] %lf intercepted cudaConfigureCall\n", t.tv_sec + t.tv_usec / 1000000.0); return ret; }