int halide_noos_set_runtime(halide_malloc_t user_malloc, halide_free_t user_free, halide_print_t print, halide_error_handler_t error, halide_do_par_for_t do_par_for, halide_do_task_t do_task, halide_get_symbol_t get_symbol, halide_load_library_t load_library, halide_get_library_symbol_t get_library_symbol) { halide_set_custom_malloc(user_malloc); halide_set_custom_free(user_free); halide_set_custom_print(print); halide_set_error_handler(error); halide_set_custom_do_par_for(do_par_for); halide_set_custom_do_task(do_task); custom_load_library = load_library; custom_get_symbol = get_symbol; custom_get_library_symbol = get_library_symbol; return 0; }
JNIEXPORT void JNICALL Java_com_example_hellohalide_CameraPreview_processFrame( JNIEnv *env, jobject obj, jbyteArray jSrc, jint j_w, jint j_h, jobject surf) { const int w = j_w, h = j_h; halide_set_error_handler(handler); unsigned char *src = (unsigned char *)env->GetByteArrayElements(jSrc, NULL); if (!src) { LOGD("src is null\n"); return; } ANativeWindow *win = ANativeWindow_fromSurface(env, surf); ANativeWindow_acquire(win); static bool first_call = true; static unsigned counter = 0; static unsigned times[16]; if (first_call) { LOGD("According to Halide, host system has %d cpus\n", halide_host_cpu_count()); LOGD("Resetting buffer format"); ANativeWindow_setBuffersGeometry(win, w, h, 0); first_call = false; for (int t = 0; t < 16; t++) times[t] = 0; } ANativeWindow_Buffer buf; ARect rect = {0, 0, w, h}; if (int err = ANativeWindow_lock(win, &buf, NULL)) { LOGD("ANativeWindow_lock failed with error code %d\n", err); return; } uint8_t *dst = (uint8_t *)buf.bits; // If we're using opencl, use the gpu backend for it. halide_set_ocl_device_type("gpu"); // Make these static so that we can reuse device allocations across frames. static buffer_t srcBuf = {0}; static buffer_t dstBuf = {0}; if (dst) { srcBuf.host = (uint8_t *)src; srcBuf.host_dirty = true; srcBuf.extent[0] = w; srcBuf.extent[1] = h; srcBuf.extent[2] = 0; srcBuf.extent[3] = 0; srcBuf.stride[0] = 1; srcBuf.stride[1] = w; srcBuf.min[0] = 0; srcBuf.min[1] = 0; srcBuf.elem_size = 1; dstBuf.host = dst; dstBuf.extent[0] = w; dstBuf.extent[1] = h; dstBuf.extent[2] = 0; dstBuf.extent[3] = 0; dstBuf.stride[0] = 1; dstBuf.stride[1] = w; dstBuf.min[0] = 0; dstBuf.min[1] = 0; dstBuf.elem_size = 1; // Just copy over chrominance untouched memcpy(dst + w*h, src + w*h, (w*h)/2); int64_t t1 = halide_current_time_ns(); halide_generated(&srcBuf, &dstBuf); if (dstBuf.dev) { halide_copy_to_host(NULL, &dstBuf); } int64_t t2 = halide_current_time_ns(); unsigned elapsed_us = (t2 - t1)/1000; times[counter & 15] = elapsed_us; counter++; unsigned min = times[0]; for (int i = 1; i < 16; i++) { if (times[i] < min) min = times[i]; } LOGD("Time taken: %d (%d)", elapsed_us, min); } ANativeWindow_unlockAndPost(win); ANativeWindow_release(win); env->ReleaseByteArrayElements(jSrc, (jbyte *)src, 0); }
int main(int argc, char **argv) { halide_set_error_handler(&my_halide_error); buffer_t in = {0}, out = {0}; in.host = (uint8_t *)malloc(64*64*4); in.elem_size = 4; in.extent[0] = 64; in.stride[0] = 1; in.extent[1] = 64; in.stride[1] = 64; out.host = (uint8_t *)malloc(64*64*4); out.elem_size = 4; out.extent[0] = 64; out.stride[0] = 1; out.extent[1] = 64; out.stride[1] = 64; // First, a successful run. int result = error_codes(&in, 64, &out); int correct = halide_error_code_success; check(result, correct); // Passing 50 as the second arg violates the call to Func::bound // in the generator result = error_codes(&in, 50, &out); correct = halide_error_code_explicit_bounds_too_small; check(result, correct); // Would read out of bounds on the input in.extent[0] = 50; result = error_codes(&in, 64, &out); correct = halide_error_code_access_out_of_bounds; check(result, correct); in.extent[0] = 64; // Input buffer larger than 2GB in.extent[0] = 10000000; in.extent[1] = 10000000; result = error_codes(&in, 64, &out); correct = halide_error_code_buffer_extents_too_large; check(result, correct); in.extent[0] = 64; in.extent[1] = 64; // Input buffer requires addressing math that would overflow 32 bits. in.stride[1] = 0x7fffffff; result = error_codes(&in, 64, &out); correct = halide_error_code_buffer_allocation_too_large; check(result, correct); in.stride[1] = 64; // stride[0] is constrained to be 1 in.stride[0] = 2; result = error_codes(&in, 64, &out); correct = halide_error_code_constraint_violated; check(result, correct); in.stride[0] = 1; // The second argument is supposed to be between 0 and 64. result = error_codes(&in, -23, &out); correct = halide_error_code_param_too_small; check(result, correct); in.extent[0] = 108; out.extent[0] = 108; result = error_codes(&in, 108, &out); correct = halide_error_code_param_too_large; check(result, correct); in.extent[0] = 64; out.extent[0] = 64; // You can't pass nullptr as a buffer_t argument. result = error_codes(nullptr, 64, &out); correct = halide_error_code_buffer_argument_is_null; check(result, correct); printf("Success!\n"); return 0; }
JNIEXPORT void JNICALL Java_com_example_hellohalide_CameraPreview_processFrame( JNIEnv *env, jobject obj, jbyteArray jSrc, jint j_w, jint j_h, jint j_orientation, jobject surf) { const int w = j_w, h = j_h, orientation = j_orientation; halide_start_clock(NULL); halide_set_error_handler(handler); unsigned char *src = (unsigned char *)env->GetByteArrayElements(jSrc, NULL); if (!src) { LOGD("src is null\n"); return; } LOGD("[output window size] j_w = %d, j_h = %d", j_w, j_h); LOGD("[src array length] jSrc.length = %d", env->GetArrayLength(jSrc)); ANativeWindow *win = ANativeWindow_fromSurface(env, surf); static bool first_call = true; static unsigned counter = 0; static unsigned times[16]; if (first_call) { LOGD("According to Halide, host system has %d cpus\n", halide_host_cpu_count()); LOGD("Resetting buffer format"); ANativeWindow_setBuffersGeometry(win, w, h, 0); first_call = false; for (int t = 0; t < 16; t++) times[t] = 0; } ANativeWindow_Buffer buf; ARect rect = {0, 0, w, h}; if (int err = ANativeWindow_lock(win, &buf, NULL)) { LOGD("ANativeWindow_lock failed with error code %d\n", err); return; } uint8_t *dst = (uint8_t *)buf.bits; // If we're using opencl, use the gpu backend for it. #if COMPILING_FOR_OPENCL halide_opencl_set_device_type("gpu"); #endif // Make these static so that we can reuse device allocations across frames. static halide_buffer_t srcBuf = {0}; static halide_dimension_t srcDim[2]; static halide_buffer_t dstBuf = {0}; static halide_dimension_t dstDim[2]; if (dst) { srcBuf.host = (uint8_t *)src; srcBuf.set_host_dirty(); srcBuf.dim = srcDim; srcBuf.dim[0].min = 0; srcBuf.dim[0].extent = w; srcBuf.dim[0].stride = 1; srcBuf.dim[1].min = 0; srcBuf.dim[1].extent = h; srcBuf.dim[1].stride = w; srcBuf.type = halide_type_of<uint8_t>(); if (orientation >= 180) { // Camera sensor is probably upside down (e.g. Nexus 5x) srcBuf.host += w*h-1; srcBuf.dim[0].stride = -1; srcBuf.dim[1].stride = -w; } dstBuf.host = dst; dstBuf.dim = dstDim; dstBuf.dim[0].min = 0; dstBuf.dim[0].extent = w; dstBuf.dim[0].stride = 1; dstBuf.dim[1].min = 0; dstBuf.dim[1].extent = h; dstBuf.dim[1].stride = w; dstBuf.type = halide_type_of<uint8_t>(); // Just set chroma to gray. memset(dst + w*h, 128, (w*h)/2); int64_t t1 = halide_current_time_ns(); hello(&srcBuf, &dstBuf); halide_copy_to_host(NULL, &dstBuf); int64_t t2 = halide_current_time_ns(); unsigned elapsed_us = (t2 - t1)/1000; times[counter & 15] = elapsed_us; counter++; unsigned min = times[0]; for (int i = 1; i < 16; i++) { if (times[i] < min) min = times[i]; } LOGD("Time taken: %d (%d)", elapsed_us, min); } ANativeWindow_unlockAndPost(win); ANativeWindow_release(win); env->ReleaseByteArrayElements(jSrc, (jbyte *)src, 0); }
JNIEXPORT void JNICALL Java_com_example_hellohalide_CameraPreview_processFrame(JNIEnv * env, jobject obj, jbyteArray jSrc, jobject surf) { halide_set_error_handler(handler); unsigned char *src = (unsigned char *)env->GetByteArrayElements(jSrc, NULL); ANativeWindow *win = ANativeWindow_fromSurface(env, surf); ANativeWindow_acquire(win); static bool first_call = true; static unsigned counter = 0; static unsigned times[16]; if (first_call) { LOGD("Resetting buffer format"); ANativeWindow_setBuffersGeometry(win, 640, 360, 0); first_call = false; for (int t = 0; t < 16; t++) times[t] = 0; } ANativeWindow_Buffer buf; ARect rect = {0, 0, 640, 360}; ANativeWindow_lock(win, &buf, &rect); uint8_t *dst = (uint8_t *)buf.bits; buffer_t srcBuf = {0}, dstBuf = {0}; srcBuf.host = (uint8_t *)src; srcBuf.extent[0] = 642; srcBuf.extent[1] = 362; srcBuf.extent[2] = 1; srcBuf.extent[3] = 1; srcBuf.stride[0] = 1; srcBuf.stride[1] = 640; srcBuf.min[0] = -1; srcBuf.min[1] = -1; srcBuf.elem_size = 1; dstBuf.host = dst; dstBuf.extent[0] = 640; dstBuf.extent[1] = 360; dstBuf.extent[2] = 1; dstBuf.extent[3] = 1; dstBuf.stride[0] = 1; dstBuf.stride[1] = 640; dstBuf.min[0] = 0; dstBuf.min[1] = 0; dstBuf.elem_size = 1; timeval t1, t2; gettimeofday(&t1, NULL); halide(&srcBuf, &dstBuf); gettimeofday(&t2, NULL); unsigned elapsed = (t2.tv_sec - t1.tv_sec)*1000000 + (t2.tv_usec - t1.tv_usec); times[counter & 15] = elapsed; counter++; unsigned min = times[0]; for (int i = 1; i < 16; i++) { if (times[i] < min) min = times[i]; } LOGD("Time taken: %d (%d)", elapsed, min); // Just copy over chrominance untouched memcpy(dst + 640*360, src + 640*480, 320*180); memcpy(dst + 640*360 + 320*180, src + 640*480 + 320*240, 320*180); ANativeWindow_unlockAndPost(win); ANativeWindow_release(win); env->ReleaseByteArrayElements(jSrc, (jbyte *)src, 0); }