std::chrono::duration<double, std::milli> run_halide(Halide::Buffer<uint8_t> &in, Halide::Buffer<uint8_t> &y, Halide::Buffer<uint8_t> &u, Halide::Buffer<uint8_t> &v) { in(0, 0, 0) = in(0, 0, 0); auto start = std::chrono::high_resolution_clock::now(); rgbyuv420gpu_ref(in.raw_buffer(), y.raw_buffer(), u.raw_buffer(), v.raw_buffer()); halide_copy_to_host(nullptr, y.raw_buffer()); halide_copy_to_host(nullptr, u.raw_buffer()); halide_copy_to_host(nullptr, v.raw_buffer()); halide_device_free(nullptr, y.raw_buffer()); halide_device_free(nullptr, u.raw_buffer()); halide_device_free(nullptr, v.raw_buffer()); halide_device_free(nullptr, in.raw_buffer()); auto end = std::chrono::high_resolution_clock::now(); return end - start; }
WEAK void CacheEntry::destroy() { halide_free(NULL, key); for (int32_t i = 0; i < tuple_count; i++) { halide_device_free(NULL, &buffer(i)); halide_free(NULL, buffer(i).host - extra_bytes_host_bytes); } }
EXPORT void destroy<BufferContents>(const BufferContents *p) { int error = halide_device_free(NULL, const_cast<buffer_t *>(&p->buf)); user_assert(!error) << "Failed to free device buffer\n"; free(p->allocation); delete p; }
WEAK void CacheEntry::destroy() { halide_free(NULL, key); for (uint32_t i = 0; i < tuple_count; i++) { halide_device_free(NULL, &buffer(i)); halide_free(NULL, get_pointer_to_header(buffer(i).host)); } }
EXPORT void destroy<BufferContents>(const BufferContents *p) { // Ignore errors. We may be cleaning up a buffer after an earlier // error, and asserting would re-raise it. halide_device_free(nullptr, const_cast<buffer_t *>(&p->buf)); free(p->allocation); delete p; }
WEAK void CacheEntry::destroy() { for (uint32_t i = 0; i < tuple_count; i++) { halide_device_free(NULL, &buf[i]); halide_free(NULL, get_pointer_to_header(buf[i].host)); } halide_free(NULL, metadata_storage); }
WEAK int halide_default_device_and_host_free(void *user_context, struct buffer_t *buf, const halide_device_interface *device_interface) { int result = halide_device_free(user_context, buf); halide_free(user_context, buf->host); buf->host = NULL; buf->host_dirty = false; buf->dev_dirty = false; return result; }
WEAK int halide_device_free_legacy(void *user_context, struct buffer_t *old_buf) { halide_buffer_t new_buf = {0}; halide_dimension_t shape[4]; new_buf.dim = shape; int err = guess_type_and_dimensionality(user_context, old_buf, &new_buf); err = err || halide_upgrade_buffer_t(user_context, "", old_buf, &new_buf, /*bounds_query_only*/ 0); err = err || halide_device_free(user_context, &new_buf); err = err || halide_downgrade_buffer_t_device_fields(user_context, "", &new_buf, old_buf); return err; }
WEAK int halide_default_device_and_host_free(void *user_context, struct halide_buffer_t *buf, const halide_device_interface_t *device_interface) { int result = halide_device_free(user_context, buf); if (buf->host) { halide_free(user_context, buf->host); buf->host = NULL; } buf->set_host_dirty(false); buf->set_device_dirty(false); return result; }
/** Copy image data from host memory to device memory. This should not be * called directly; Halide handles copying to the device automatically. */ WEAK int halide_copy_to_device(void *user_context, struct halide_buffer_t *buf, const halide_device_interface_t *device_interface) { int result = 0; ScopedMutexLock lock(&device_copy_mutex); debug(user_context) << "halide_copy_to_device " << buf << ", host: " << buf->host << ", dev: " << buf->device << ", host_dirty: " << buf->host_dirty() << ", dev_dirty: " << buf->device_dirty() << "\n"; if (device_interface == NULL) { debug(user_context) << "halide_copy_to_device " << buf << " interface is NULL\n"; if (buf->device_interface == NULL) { debug(user_context) << "halide_copy_to_device " << buf << " no interface error\n"; return halide_error_code_no_device_interface; } device_interface = buf->device_interface; } if (buf->device && buf->device_interface != device_interface) { debug(user_context) << "halide_copy_to_device " << buf << " flipping buffer to new device\n"; if (buf->device_interface != NULL && buf->device_dirty()) { halide_assert(user_context, !buf->host_dirty()); result = copy_to_host_already_locked(user_context, buf); if (result != 0) { debug(user_context) << "halide_copy_to_device " << buf << " flipping buffer halide_copy_to_host failed\n"; return result; } } result = halide_device_free(user_context, buf); if (result != 0) { debug(user_context) << "halide_copy_to_device " << buf << " flipping buffer halide_device_free failed\n"; return result; } buf->set_host_dirty(true); // force copy back to new device below. } if (buf->device == 0) { result = halide_device_malloc(user_context, buf, device_interface); if (result != 0) { debug(user_context) << "halide_copy_to_device " << buf << " halide_copy_to_device call to halide_device_malloc failed\n"; return result; } } if (buf->host_dirty()) { debug(user_context) << "halide_copy_to_device " << buf << " host is dirty\n"; if (buf->device_dirty()) { debug(user_context) << "halide_copy_to_device " << buf << " dev_dirty is true error\n"; return halide_error_code_copy_to_device_failed; } else { result = device_interface->copy_to_device(user_context, buf); if (result == 0) { buf->set_host_dirty(false); } else { debug(user_context) << "halide_copy_to_device " << buf << "device copy_to_device returned an error\n"; return halide_error_code_copy_to_device_failed; } } } return 0; }
/** Free any device memory associated with a halide_buffer_t and ignore any * error. Used when freeing as a destructor on an error. */ WEAK void halide_device_free_as_destructor(void *user_context, void *obj) { struct halide_buffer_t *buf = (struct halide_buffer_t *)obj; halide_device_free(user_context, buf); }
WEAK int halide_weak_device_free(void *user_context, struct halide_buffer_t *buf) { return halide_device_free(user_context, buf); }
int main(int argc, char** argv) { LOGI("\nvvvv vvvv vvvv"); int width = 128; int height = 128; int channels = 4; int32_t *input = (int32_t*)malloc(width * height * channels * sizeof(int32_t)); int32_t *output = (int32_t*)malloc(width * height * channels * sizeof(int32_t)); LOGI("Allocated memory for %dx%dx%d image", width, height, channels); buffer_t bt_input = make_interleaved_image(width, height, channels, input); for (int i = 0; i < std::min(bt_input.extent[0], width); i++) { for (int j = 0; j < std::min(bt_input.extent[1], height); j++) { for (int k = 0; k < bt_input.extent[2]; k++) { input[i * bt_input.stride[0] + j * bt_input.stride[1] + k * bt_input.stride[2]] = ((i + j) % 2) * 6; } } } LOGI("Input :\n"); print(bt_input); bt_input.host_dirty = true; buffer_t bt_output = make_interleaved_image(width, height, channels, output); two_kernels_filter(&bt_input, &bt_output); LOGI("Filter is done."); halide_device_sync(NULL, &bt_output); LOGI("Sync is done"); halide_copy_to_host(NULL, &bt_output); LOGI("Output :\n"); print(bt_output); int count_mismatches = 0; for (int i = 0; i < bt_output.extent[0]; i++) { for (int j = 0; j < bt_output.extent[1]; j++) { for (int k = 0; k < bt_output.extent[2]; k++) { int32_t output_value = ((int32_t*)bt_output.host)[i * bt_output.stride[0] + j * bt_output.stride[1] + k * bt_output.stride[2]]; int32_t input_value = ((int32_t*)bt_input.host)[i * bt_input.stride[0] + j * bt_input.stride[1] + k * bt_input.stride[2]]; if (output_value != input_value) { if (count_mismatches < 100) { std::ostringstream str; str << "bt_output and bt_input results differ at " << "(" << i << ", " << j << ", " << k << "):" << output_value << " != " << input_value << "\n"; LOGI("%s", str.str().c_str()); } count_mismatches++; } } } } LOGI(count_mismatches == 0? "Test passed.\n": "Test failed.\n"); halide_device_free(NULL, &bt_input); halide_device_free(NULL, &bt_output); halide_device_release(NULL, halide_openglcompute_device_interface()); LOGI("^^^^ ^^^^ ^^^^\n"); }
int Buffer::free_dev_buffer() { return halide_device_free(NULL, raw_buffer()); }