WEAK void halide_dev_malloc(void *user_context, buffer_t* buf) { if (buf->dev) { // This buffer already has a device allocation return; } size_t size = __buf_size(user_context, buf); #ifdef DEBUG halide_printf(user_context, "dev_malloc allocating buffer of %zd bytes, " "extents: %zdx%zdx%zdx%zd strides: %zdx%zdx%zdx%zd (%d bytes per element)\n", size, buf->extent[0], buf->extent[1], buf->extent[2], buf->extent[3], buf->stride[0], buf->stride[1], buf->stride[2], buf->stride[3], buf->elem_size); #endif CUdeviceptr p; TIME_CALL( cuMemAlloc(&p, size), "dev_malloc"); buf->dev = (uint64_t)p; halide_assert(user_context, buf->dev); #ifdef DEBUG halide_assert(user_context, halide_validate_dev_pointer(user_context, buf)); #endif }
WEAK void halide_copy_to_dev(void *user_context, buffer_t* buf) { if (buf->host_dirty) { halide_assert(user_context, buf->host && buf->dev); size_t size = __buf_size(user_context, buf); #ifdef DEBUG halide_printf(user_context, "copy_to_dev (%lld bytes) %p -> %p\n", (long long)size, buf->host, (void*)buf->dev); #endif halide_assert(user_context, halide_validate_dev_pointer(user_context, buf)); int err = clEnqueueWriteBuffer( *cl_q, (cl_mem)((void*)buf->dev), CL_TRUE, 0, size, buf->host, 0, NULL, NULL ); CHECK_ERR( err, "clEnqueueWriteBuffer" ); } buf->host_dirty = false; }
WEAK void halide_copy_to_host(void *user_context, buffer_t* buf) { if (buf->dev_dirty) { halide_assert(user_context, buf->dev); halide_assert(user_context, buf->host); size_t size = __buf_size(user_context, buf); #ifdef DEBUG char msg[256]; snprintf(msg, 256, "copy_to_host (%zu bytes) %p -> %p", size, (void*)buf->dev, buf->host ); halide_assert(user_context, halide_validate_dev_pointer(user_context, buf)); #endif TIME_CALL( cuMemcpyDtoH(buf->host, buf->dev, size), msg ); } buf->dev_dirty = false; }
WEAK void halide_copy_to_dev(void *user_context, buffer_t* buf) { if (buf->host_dirty) { halide_assert(user_context, buf->host && buf->dev); size_t size = __buf_size(user_context, buf); #ifdef DEBUG char msg[256]; snprintf(msg, 256, "copy_to_dev (%zu bytes) %p -> %p (t=%lld)", size, buf->host, (void*)buf->dev, (long long)halide_current_time_ns(user_context) ); halide_assert(user_context, halide_validate_dev_pointer(user_context, buf)); #endif TIME_CALL( cuMemcpyHtoD(buf->dev, buf->host, size), msg ); } buf->host_dirty = false; }
WEAK void halide_copy_to_host(void *user_context, buffer_t* buf) { if (buf->dev_dirty) { clFinish(*cl_q); // block on completion before read back halide_assert(user_context, buf->host && buf->dev); size_t size = __buf_size(user_context, buf); #ifdef DEBUG halide_printf(user_context, "copy_to_host buf %p (%lld bytes) %p -> %p\n", buf, (long long)size, (void*)buf->dev, buf->host ); #endif halide_assert(user_context, halide_validate_dev_pointer(user_context, buf, size)); int err = clEnqueueReadBuffer( *cl_q, (cl_mem)((void*)buf->dev), CL_TRUE, 0, size, buf->host, 0, NULL, NULL ); CHECK_ERR( err, "clEnqueueReadBuffer" ); } buf->dev_dirty = false; }
WEAK void halide_copy_to_dev(buffer_t* buf) { if (buf->host_dirty) { halide_assert(buf->host && buf->dev); size_t size = __buf_size(buf); #ifdef DEBUG char msg[256]; snprintf(msg, 256, "copy_to_dev (%lld bytes) %p -> %p (t=%lld)", (long long)size, buf->host, (void*)buf->dev, (long long)halide_current_time_ns() ); #endif halide_assert(halide_validate_dev_pointer(buf)); TIME_START(); int err = clEnqueueWriteBuffer( cl_q, (cl_mem)((void*)buf->dev), CL_TRUE, 0, size, buf->host, 0, NULL, NULL ); CHECK_ERR( err, msg ); TIME_CHECK(msg); } buf->host_dirty = false; }
WEAK void halide_dev_malloc(buffer_t* buf) { if (buf->dev) { halide_assert(halide_validate_dev_pointer(buf)); return; } size_t size = __buf_size(buf); #ifdef DEBUG halide_printf("dev_malloc allocating buffer of %zd bytes, extents: %zdx%zdx%zdx%zd strides: %zdx%zdx%zdx%zd (%d bytes per element)\n", size, buf->extent[0], buf->extent[1], buf->extent[2], buf->extent[3], buf->stride[0], buf->stride[1], buf->stride[2], buf->stride[3], buf->elem_size); #endif buf->dev = (uint64_t)__dev_malloc(size); halide_assert(buf->dev); }
WEAK void halide_copy_to_host(buffer_t* buf) { if (buf->dev_dirty) { clFinish(cl_q); // block on completion before read back halide_assert(buf->host && buf->dev); size_t size = __buf_size(buf); #ifndef DEBUG char msg[1] = { 0 }; #else char msg[256]; snprintf(msg, 256, "copy_to_host (%lld bytes) %p -> %p", (long long)size, (void*)buf->dev, buf->host ); #endif halide_assert(halide_validate_dev_pointer(buf, size)); TIME_START(); #ifdef DEBUG halide_printf("%s\n", msg); #endif int err = clEnqueueReadBuffer( cl_q, (cl_mem)((void*)buf->dev), CL_TRUE, 0, size, buf->host, 0, NULL, NULL ); CHECK_ERR( err, msg ); TIME_CHECK(msg); } buf->dev_dirty = false; }
WEAK void halide_dev_malloc(void *user_context, buffer_t* buf) { if (buf->dev) { halide_assert(user_context, halide_validate_dev_pointer(user_context, buf)); return; } size_t size = __buf_size(user_context, buf); #ifdef DEBUG halide_printf(user_context, "dev_malloc allocating buffer of %lld bytes, " "extents: %lldx%lldx%lldx%lld strides: %lldx%lldx%lldx%lld (%d bytes per element)\n", (long long)size, (long long)buf->extent[0], (long long)buf->extent[1], (long long)buf->extent[2], (long long)buf->extent[3], (long long)buf->stride[0], (long long)buf->stride[1], (long long)buf->stride[2], (long long)buf->stride[3], buf->elem_size); #endif buf->dev = (uint64_t)__dev_malloc(user_context, size); #ifdef DEBUG halide_printf(user_context, "dev_malloc allocated buffer %p of with buf->dev of %p\n", buf, (void *)buf->dev); #endif halide_assert(user_context, buf->dev); }