static vx_status VX_CALLBACK vxThresholdInputValidator(vx_node node, vx_uint32 index) { vx_status status = VX_ERROR_INVALID_PARAMETERS; if (index == 0) { vx_parameter param = vxGetParameterByIndex(node, index); if (param) { vx_image input = 0; vxQueryParameter(param, VX_PARAMETER_ATTRIBUTE_REF, &input, sizeof(input)); if (input) { vx_df_image format = 0; vxQueryImage(input, VX_IMAGE_ATTRIBUTE_FORMAT, &format, sizeof(format)); if (format == VX_DF_IMAGE_U8) { status = VX_SUCCESS; } else { status = VX_ERROR_INVALID_FORMAT; } vxReleaseImage(&input); } vxReleaseParameter(¶m); } } else if (index == 1) { vx_parameter param = vxGetParameterByIndex(node, index); if (param) { vx_threshold threshold = 0; vxQueryParameter(param, VX_PARAMETER_ATTRIBUTE_REF, &threshold, sizeof(threshold)); if (threshold) { vx_enum type = 0; vxQueryThreshold(threshold, VX_THRESHOLD_ATTRIBUTE_TYPE, &type, sizeof(type)); if ((type == VX_THRESHOLD_TYPE_BINARY) || (type == VX_THRESHOLD_TYPE_RANGE)) { status = VX_SUCCESS; } else { status = VX_ERROR_INVALID_TYPE; } vxReleaseThreshold(&threshold); } vxReleaseParameter(¶m); } } return status; }
static vx_status vxEdgeTrace(vx_image norm, vx_threshold threshold, vx_image output) { vx_rectangle_t rect; vx_imagepatch_addressing_t norm_addr, output_addr; void *norm_base = NULL, *output_base = NULL; vx_uint32 y = 0, x = 0; vx_int32 lower = 0, upper = 0; vx_status status = VX_SUCCESS; vxQueryThreshold(threshold, VX_THRESHOLD_ATTRIBUTE_THRESHOLD_LOWER, &lower, sizeof(lower)); vxQueryThreshold(threshold, VX_THRESHOLD_ATTRIBUTE_THRESHOLD_UPPER, &upper, sizeof(upper)); vxGetValidRegionImage(norm, &rect); status |= vxAccessImagePatch(norm, &rect, 0, &norm_addr, &norm_base, VX_READ_ONLY); status |= vxAccessImagePatch(output, &rect, 0, &output_addr, &output_base, VX_WRITE_ONLY); if (status == VX_SUCCESS) { const vx_uint8 NO = 0, MAYBE = 127, YES = 255; /* Initially we add all YES pixels to the stack. Later we only add MAYBE pixels to it, and we reset their state to YES afterwards; so we can never add the same pixel more than once. That means that the stack size is bounded by the image size. */ vx_uint32 (*tracing_stack)[2] = malloc(output_addr.dim_y * output_addr.dim_x * sizeof *tracing_stack); vx_uint32 (*stack_top)[2] = tracing_stack; for (y = 0; y < norm_addr.dim_y; y++) for (x = 0; x < norm_addr.dim_x; x++) { vx_uint16 *norm_ptr = vxFormatImagePatchAddress2d(norm_base, x, y, &norm_addr); vx_uint8 *output_ptr = vxFormatImagePatchAddress2d(output_base, x, y, &output_addr); if (*norm_ptr > upper) { *output_ptr = YES; (*stack_top)[0] = x; (*stack_top)[1] = y; ++stack_top; } else if (*norm_ptr <= lower) { *output_ptr = NO; } else { *output_ptr = MAYBE; } } while (stack_top != tracing_stack) { int i; --stack_top; x = (*stack_top)[0]; y = (*stack_top)[1]; for (i = 0; i < dimof(dir_offsets); ++i) { const struct offset_t offset = dir_offsets[i]; vx_uint32 new_x, new_y; vx_uint8 *output_ptr; if (x == 0 && offset.x < 0) continue; if (x == output_addr.dim_x - 1 && offset.x > 0) continue; if (y == 0 && offset.y < 0) continue; if (y == output_addr.dim_y - 1 && offset.y > 0) continue; new_x = x + offset.x; new_y = y + offset.y; output_ptr = vxFormatImagePatchAddress2d(output_base, new_x, new_y, &output_addr); if (*output_ptr != MAYBE) continue; *output_ptr = YES; (*stack_top)[0] = new_x; (*stack_top)[1] = new_y; ++stack_top; } } free(tracing_stack); for (y = 0; y < output_addr.dim_y; y++) for (x = 0; x < output_addr.dim_x; x++) { vx_uint8 *output_ptr = vxFormatImagePatchAddress2d(output_base, x, y, &output_addr); if (*output_ptr == MAYBE) *output_ptr = NO; } status |= vxCommitImagePatch(norm, 0, 0, &norm_addr, norm_base); status |= vxCommitImagePatch(output, &rect, 0, &output_addr, output_base); } return status; }
/*! \brief Calls an OpenCL kernel from OpenVX Graph. * Steps: * \arg Find the target * \arg Get the vxcl context * \arg Find the kernel (to get cl kernel information) * \arg for each input parameter that is an object, enqueue write * \arg wait for finish * \arg for each parameter, SetKernelArg * \arg call kernel * \arg wait for finish * \arg for each output parameter that is an object, enqueue read * \arg wait for finish * \note This implementation will attempt to use the External API as much as possible, * but will cast to internal representation when needed (due to lack of API or * need for secret information). This is not an optimal OpenCL invocation. */ vx_status vxclCallOpenCLKernel(vx_node node, const vx_reference *parameters, vx_uint32 num) { vx_status status = VX_FAILURE; vx_context context = node->base.context; vx_target target = (vx_target_t *)&node->base.context->targets[node->affinity]; vx_cl_kernel_description_t *vxclk = vxclFindKernel(node->kernel->enumeration); vx_uint32 pidx, pln, didx, plidx, argidx; cl_int err = 0; size_t off_dim[3] = {0,0,0}; size_t work_dim[3]; //size_t local_dim[3]; cl_event writeEvents[VX_INT_MAX_PARAMS]; cl_event readEvents[VX_INT_MAX_PARAMS]; cl_int we = 0, re = 0; vxSemWait(&target->base.lock); // determine which platform to use plidx = 0; // determine which device to use didx = 0; /* for each input/bi data object, enqueue it and set the kernel parameters */ for (argidx = 0, pidx = 0; pidx < num; pidx++) { vx_reference ref = node->parameters[pidx]; vx_enum dir = node->kernel->signature.directions[pidx]; vx_enum type = node->kernel->signature.types[pidx]; vx_memory_t *memory = NULL; switch (type) { case VX_TYPE_ARRAY: memory = &((vx_array)ref)->memory; break; case VX_TYPE_CONVOLUTION: memory = &((vx_convolution)ref)->base.memory; break; case VX_TYPE_DISTRIBUTION: memory = &((vx_distribution)ref)->memory; break; case VX_TYPE_IMAGE: memory = &((vx_image)ref)->memory; break; case VX_TYPE_LUT: memory = &((vx_lut_t*)ref)->memory; break; case VX_TYPE_MATRIX: memory = &((vx_matrix)ref)->memory; break; //case VX_TYPE_PYRAMID: // break; case VX_TYPE_REMAP: memory = &((vx_remap)ref)->memory; break; //case VX_TYPE_SCALAR: //case VX_TYPE_THRESHOLD: // break; } if (memory) { for (pln = 0; pln < memory->nptrs; pln++) { if (memory->cl_type == CL_MEM_OBJECT_BUFFER) { if (type == VX_TYPE_IMAGE) { /* set the work dimensions */ work_dim[0] = memory->dims[pln][VX_DIM_X]; work_dim[1] = memory->dims[pln][VX_DIM_Y]; // width, height, stride_x, stride_y err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_int32), &memory->dims[pln][VX_DIM_X]); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_int32), &memory->dims[pln][VX_DIM_Y]); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_int32), &memory->strides[pln][VX_DIM_X]); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_int32), &memory->strides[pln][VX_DIM_Y]); VX_PRINT(VX_ZONE_INFO, "Setting vx_image as Buffer with 4 parameters\n"); } else if (type == VX_TYPE_ARRAY || type == VX_TYPE_LUT) { vx_array arr = (vx_array)ref; // sizeof item, active count, capacity err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&arr->item_size); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&arr->num_items); // this is output? err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&arr->capacity); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_int32), &arr->memory.strides[VX_DIM_X]); VX_PRINT(VX_ZONE_INFO, "Setting vx_buffer as Buffer with 4 parameters\n"); } else if (type == VX_TYPE_MATRIX) { vx_matrix mat = (vx_matrix)ref; // columns, rows err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&mat->columns); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&mat->rows); VX_PRINT(VX_ZONE_INFO, "Setting vx_matrix as Buffer with 2 parameters\n"); } else if (type == VX_TYPE_DISTRIBUTION) { vx_distribution dist = (vx_distribution)ref; // num, range, offset, winsize vx_uint32 range = dist->memory.dims[0][VX_DIM_X] * dist->window_x; err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&dist->memory.dims[VX_DIM_X]); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&range); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&dist->offset_x); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&dist->window_x); } else if (type == VX_TYPE_CONVOLUTION) { vx_convolution conv = (vx_convolution)ref; // columns, rows, scale err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&conv->base.columns); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&conv->base.rows); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&conv->scale); } err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(cl_mem), &memory->hdls[pln]); CL_ERROR_MSG(err, "clSetKernelArg"); if (dir == VX_INPUT || dir == VX_BIDIRECTIONAL) { err = clEnqueueWriteBuffer(context->queues[plidx][didx], memory->hdls[pln], CL_TRUE, 0, vxComputeMemorySize(memory, pln), memory->ptrs[pln], 0, NULL, &ref->event); } } else if (memory->cl_type == CL_MEM_OBJECT_IMAGE2D) { vx_rectangle_t rect = {0}; vx_image image = (vx_image)ref; vxGetValidRegionImage(image, &rect); size_t origin[3] = {rect.start_x, rect.start_y, 0}; size_t region[3] = {rect.end_x-rect.start_x, rect.end_y-rect.start_y, 1}; /* set the work dimensions */ work_dim[0] = rect.end_x-rect.start_x; work_dim[1] = rect.end_y-rect.start_y; VX_PRINT(VX_ZONE_INFO, "Setting vx_image as image2d_t wd={%zu,%zu} arg:%u\n",work_dim[0], work_dim[1], argidx); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(cl_mem), &memory->hdls[pln]); CL_ERROR_MSG(err, "clSetKernelArg"); if (err != CL_SUCCESS) { VX_PRINT(VX_ZONE_ERROR, "Error Calling Kernel %s, parameter %u\n", node->kernel->name, pidx); } if (dir == VX_INPUT || dir == VX_BIDIRECTIONAL) { err = clEnqueueWriteImage(context->queues[plidx][didx], memory->hdls[pln], CL_TRUE, origin, region, memory->strides[pln][VX_DIM_Y], 0, memory->ptrs[pln], 0, NULL, &ref->event); CL_ERROR_MSG(err, "clEnqueueWriteImage"); } } } } else { if (type == VX_TYPE_SCALAR) { vx_value_t value; // largest platform atomic vx_size size = 0ul; vx_scalar sc = (vx_scalar)ref; vx_enum stype = VX_TYPE_INVALID; vxReadScalarValue(sc, &value); vxQueryScalar(sc, VX_SCALAR_ATTRIBUTE_TYPE, &stype, sizeof(stype)); size = vxSizeOfType(stype); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, size, &value); } else if (type == VX_TYPE_THRESHOLD) { vx_enum ttype = 0; vx_threshold th = (vx_threshold)ref; vxQueryThreshold(th, VX_THRESHOLD_ATTRIBUTE_TYPE, &ttype, sizeof(ttype)); if (ttype == VX_THRESHOLD_TYPE_BINARY) { err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint8), &th->value); } else if (ttype == VX_THRESHOLD_TYPE_RANGE) { err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint8), &th->lower); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint8), &th->upper); } } } } we = 0; for (pidx = 0; pidx < num; pidx++) { vx_reference ref = node->parameters[pidx]; vx_enum dir = node->kernel->signature.directions[pidx]; if (dir == VX_INPUT || dir == VX_BIDIRECTIONAL) { memcpy(&writeEvents[we++],&ref->event, sizeof(cl_event)); } } //local_dim[0] = 1; //local_dim[1] = 1; err = clEnqueueNDRangeKernel(context->queues[plidx][didx], vxclk->kernels[plidx], 2, off_dim, work_dim, NULL, we, writeEvents, &node->base.event); CL_ERROR_MSG(err, "clEnqueueNDRangeKernel"); /* enqueue a read on all output data */ for (pidx = 0; pidx < num; pidx++) { vx_reference ref = node->parameters[pidx]; vx_enum dir = node->kernel->signature.directions[pidx]; vx_enum type = node->kernel->signature.types[pidx]; if (dir == VX_OUTPUT || dir == VX_BIDIRECTIONAL) { vx_memory_t *memory = NULL; switch (type) { case VX_TYPE_ARRAY: memory = &((vx_array)ref)->memory; break; case VX_TYPE_CONVOLUTION: memory = &((vx_convolution)ref)->base.memory; break; case VX_TYPE_DISTRIBUTION: memory = &((vx_distribution)ref)->memory; break; case VX_TYPE_IMAGE: memory = &((vx_image)ref)->memory; break; case VX_TYPE_LUT: memory = &((vx_lut_t*)ref)->memory; break; case VX_TYPE_MATRIX: memory = &((vx_matrix)ref)->memory; break; //case VX_TYPE_PYRAMID: // break; case VX_TYPE_REMAP: memory = &((vx_remap)ref)->memory; break; //case VX_TYPE_SCALAR: //case VX_TYPE_THRESHOLD: // break; } if (memory) { for (pln = 0; pln < memory->nptrs; pln++) { if (memory->cl_type == CL_MEM_OBJECT_BUFFER) { err = clEnqueueReadBuffer(context->queues[plidx][didx], memory->hdls[pln], CL_TRUE, 0, vxComputeMemorySize(memory, pln), memory->ptrs[pln], 1, &node->base.event, &ref->event); CL_ERROR_MSG(err, "clEnqueueReadBuffer"); } else if (memory->cl_type == CL_MEM_OBJECT_IMAGE2D) { vx_rectangle_t rect = {0}; vx_image image = (vx_image)ref; vxGetValidRegionImage(image, &rect); size_t origin[3] = {rect.start_x, rect.start_y, 0}; size_t region[3] = {rect.end_x-rect.start_x, rect.end_y-rect.start_y, 1}; /* set the work dimensions */ work_dim[0] = rect.end_x-rect.start_x; work_dim[1] = rect.end_y-rect.start_y; err = clEnqueueReadImage(context->queues[plidx][didx], memory->hdls[pln], CL_TRUE, origin, region, memory->strides[pln][VX_DIM_Y], 0, memory->ptrs[pln], 1, &node->base.event, &ref->event); CL_ERROR_MSG(err, "clEnqueueReadImage"); VX_PRINT(VX_ZONE_INFO, "Reading Image wd={%zu,%zu}\n", work_dim[0], work_dim[1]); } } } } } re = 0; for (pidx = 0; pidx < num; pidx++) { vx_reference ref = node->parameters[pidx]; vx_enum dir = node->kernel->signature.directions[pidx]; if (dir == VX_OUTPUT || dir == VX_BIDIRECTIONAL) { memcpy(&readEvents[re++],&ref->event, sizeof(cl_event)); } } err = clFlush(context->queues[plidx][didx]); CL_ERROR_MSG(err, "Flush"); VX_PRINT(VX_ZONE_TARGET, "Waiting for read events!\n"); clWaitForEvents(re, readEvents); if (err == CL_SUCCESS) status = VX_SUCCESS; //exit: VX_PRINT(VX_ZONE_API, "%s exiting %d\n", __FUNCTION__, status); vxSemPost(&target->base.lock); return status; }