cudaError_t cudaMemcpyAsync (void *p1, const void *p2, size_t p3, enum cudaMemcpyKind p4, cudaStream_t p5) { cudaError_t res; #if defined(DEBUG) fprintf (stderr, PACKAGE_NAME": THREAD %d cudaMemcpyAsync is at %p\n", THREADID, real_cudaMemcpyAsync); fprintf (stderr, PACKAGE_NAME": THREAD %d cudaMemcpyAsync params %p %p %d %d %d\n", THREADID, p1, p2, p3, p4, p5); #endif if (real_cudaMemcpyAsync != NULL && mpitrace_on && Extrae_get_trace_CUDA()) { Extrae_cudaMemcpyAsync_Enter (p1, p2, p3, p4, p5); res = real_cudaMemcpyAsync (p1, p2, p3, p4, p5); Extrae_cudaMemcpyAsync_Exit (); } else if (real_cudaMemcpyAsync != NULL && !(mpitrace_on && Extrae_get_trace_CUDA())) { res = real_cudaMemcpyAsync (p1, p2, p3, p4, p5); } else { fprintf (stderr, "Unable to find cudaMemcpyAsync in DSOs!! Dying...\n"); exit (0); } return res; }
cudaError_t cudaMemcpyAsync(void *dst, const void * src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream) { cudaError_t retcode = real_cudaMemcpyAsync(dst, src, count, kind, stream); handle_cudaMemcpy(dst, src, count, kind, stream); return retcode; }
cudaError_t cudaMemcpyAsync( void *dst, void *src, size_t size, enum cudaMemcpyKind kind, cudaStream_t stream) { CUDATRACE_INTERPOSE(cudaMemcpyAsync); fprintf(stderr, "TRACE: cudaMemcpyAsync %p %p %lu %d %p\n", dst, src, size, kind, stream); return real_cudaMemcpyAsync( dst, src, size, kind, stream); }