//! [node] vx_node vxXYZNode(vx_graph graph, vx_image input, vx_uint32 value, vx_image output, vx_array temp) { vx_uint32 i; vx_node node = 0; vx_context context = vxGetContext((vx_reference)graph); vx_status status = vxLoadKernels(context, "xyz"); if (status == VX_SUCCESS) { //! [xyz node] vx_kernel kernel = vxGetKernelByName(context, VX_KERNEL_NAME_KHR_XYZ); if (kernel) { node = vxCreateGenericNode(graph, kernel); if (vxGetStatus((vx_reference)node) == VX_SUCCESS) { vx_status statuses[4]; vx_scalar scalar = vxCreateScalar(context, VX_TYPE_INT32, &value); statuses[0] = vxSetParameterByIndex(node, 0, (vx_reference)input); statuses[1] = vxSetParameterByIndex(node, 1, (vx_reference)scalar); statuses[2] = vxSetParameterByIndex(node, 2, (vx_reference)output); statuses[3] = vxSetParameterByIndex(node, 3, (vx_reference)temp); vxReleaseScalar(&scalar); for (i = 0; i < dimof(statuses); i++) { if (statuses[i] != VX_SUCCESS) { status = VX_ERROR_INVALID_PARAMETERS; vxReleaseNode(&node); vxReleaseKernel(&kernel); node = 0; kernel = 0; break; } } } else { vxReleaseKernel(&kernel); } } else { vxUnloadKernels(context, "xyz"); } //! [xyz node] } return node; }
/*! \brief The destructor to remove a user loaded module from OpenVX. * \param [in] context The handle to the implementation context. * \return A \ref vx_status_e enumeration. Returns errors if some or all kernels were not added * correctly. * \note This follows the function pointer definition of a \ref vx_unpublish_kernels_f * and uses the predefined name for the entry point, "vxUnpublishKernels". * \ingroup group_example_kernel */ /*VX_API_ENTRY*/ vx_status VX_API_CALL vxUnpublishKernels(vx_context context) { vx_status status = VX_FAILURE; vx_uint32 k = 0; for (k = 0; k < num_kernels; k++) { vx_kernel kernel = vxGetKernelByName(context, kernels[k]->name); vx_kernel kernelcpy = kernel; if (kernel) { status = vxReleaseKernel(&kernelcpy); if (status != VX_SUCCESS) { vxAddLogEntry((vx_reference)context, status, "Failed to release kernel[%u]=%s\n",k, kernels[k]->name); } else { kernelcpy = kernel; status = vxRemoveKernel(kernelcpy); if (status != VX_SUCCESS) { vxAddLogEntry((vx_reference)context, status, "Failed to remove kernel[%u]=%s\n",k, kernels[k]->name); } } } else { vxAddLogEntry((vx_reference)context, status, "Failed to get added kernel %s\n", kernels[k]->name); } } return status; }
//////// // The node creation interface for the "app.userkernels.tensor_cos" kernel. // This user kernel example expects parameters in the following order: // parameter #0 -- input tensor of format VX_TYPE_INT16 // parameter #1 -- output tensor of format VX_TYPE_INT16 // // TODO STEP 01:******** // 1. Use vxGetKernelByEnum API to get a kernel object from USER_KERNEL_TENSOR_COS. // Note that you need to use vxGetContext API to get the context from a graph object. // 2. Use vxCreateGenericNode API to create a node from the kernel object. // 3. Use vxSetParameterByIndex API to set node arguments. // 4. Release the kernel object that are not needed any more. // 5. Use ERROR_CHECK_OBJECT and ERROR_CHECK_STATUS macros for error detection. vx_node userTensorCosNode( vx_graph graph, vx_tensor input, vx_tensor output ) { vx_context context = vxGetContext( ( vx_reference ) graph ); vx_kernel kernel = vxGetKernelByEnum( context, USER_KERNEL_TENSOR_COS ); ERROR_CHECK_OBJECT( kernel ); vx_node node = vxCreateGenericNode( graph, kernel ); ERROR_CHECK_OBJECT( node ); // ERROR_CHECK_STATUS( vxSetParameterByIndex( node, 0, ( vx_reference ) /* Fill in parameter */ ) ); // ERROR_CHECK_STATUS( vxSetParameterByIndex( node, 1, ( vx_reference ) /* Fill in parameter */ ) ); ERROR_CHECK_STATUS( vxReleaseKernel( &kernel ) ); return node; }
vx_status publishROIPoolingLayer(vx_context context) { // add kernel to the context with callbacks vx_kernel kernel = vxAddUserKernel(context, "org.khronos.nn_extension.roi_pooling_layer", VX_KERNEL_ROI_POOLING_LAYER, processROIPoolingLayer, 4, validateROIPoolingLayer, initializeROIPoolingLayer, uninitializeROIPoolingLayer); ERROR_CHECK_OBJECT(kernel); // set kernel parameters ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); // finalize and release kernel object ERROR_CHECK_STATUS(vxFinalizeKernel(kernel)); ERROR_CHECK_STATUS(vxReleaseKernel(&kernel)); return VX_SUCCESS; }
vx_node vxCreateNodeByStructure(vx_graph graph, vx_enum kernelenum, vx_parameter_item_t *params, vx_uint32 num) { vx_status status = VX_SUCCESS; vx_node node = 0; vx_context context = vxGetContext(graph); vx_kernel kernel = vxGetKernelByEnum(context, kernelenum); if (kernel) { node = vxCreateNode(graph, kernel); if (node) { vx_uint32 p = 0; for (p = 0; p < num; p++) { status = vxSetParameterByIndex(node, p, params[p].direction, params[p].reference); if (status != VX_SUCCESS) { vxAddLogEntry(graph, status, "Kernel %d Parameter %u is invalid.\n", kernelenum, p); vxReleaseNode(&node); node = 0; break; } } } else { vxAddLogEntry(graph, VX_ERROR_INVALID_PARAMETERS, "Failed to create node with kernel enum %d\n", kernelenum); status = VX_ERROR_NO_MEMORY; } vxReleaseKernel(&kernel); } else { vxAddLogEntry(graph, VX_ERROR_INVALID_PARAMETERS, "failed to retrieve kernel enum %d\n", kernelenum); status = VX_ERROR_NOT_SUPPORTED; } return node; }
//////// // User kernels needs to be registered with every OpenVX context before use in a graph. // // TODO:******** // 1. Use vxAddUserKernel API to register "app.userkernels.tensor_cos" with // kernel enumeration = USER_KERNEL_TENSOR_COS, numParams = 2, and // all of the user kernel callback functions you implemented above. // 2. Use vxAddParameterToKernel API to specify direction, data_type, and // state of all 2 parameters to the kernel. Look into the comments of // userTensorCosNode function (above) to details about the order of // kernel parameters and their types. // 3. Use vxFinalizeKernel API to make the kernel ready to use in a graph. // Note that the kernel object is still valid after this call. // So you need to call vxReleaseKernel before returning from this function. vx_status registerUserKernel( vx_context context ) { vx_kernel kernel = vxAddUserKernel( context, "app.userkernels.tensor_cos", USER_KERNEL_TENSOR_COS, tensor_cos_host_side_function, 2, // numParams tensor_cos_validator, NULL, NULL ); ERROR_CHECK_OBJECT( kernel ); ERROR_CHECK_STATUS( vxAddParameterToKernel( kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED ) ); // input ERROR_CHECK_STATUS( vxAddParameterToKernel( kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED ) ); // output ERROR_CHECK_STATUS( vxFinalizeKernel( kernel ) ); ERROR_CHECK_STATUS( vxReleaseKernel( &kernel ) ); vxAddLogEntry( ( vx_reference ) context, VX_SUCCESS, "OK: registered user kernel app.userkernels.tensor_cos\n" ); return VX_SUCCESS; }
vx_status publishSoftmaxLayer(vx_context context) { // add kernel to the context with callbacks vx_kernel kernel = vxAddUserKernel(context, "org.khronos.nn_extension.softmax_layer", VX_KERNEL_SOFTMAX_LAYER, processSoftmaxLayer, 2, validateSoftmaxLayer, initializeSoftmaxLayer, uninitializeSoftmaxLayer); ERROR_CHECK_OBJECT(kernel); // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers vx_bool enableBufferAccess = vx_true_e; ERROR_CHECK_STATUS(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_OPENCL_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); // set kernel parameters ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); // finalize and release kernel object ERROR_CHECK_STATUS(vxFinalizeKernel(kernel)); ERROR_CHECK_STATUS(vxReleaseKernel(&kernel)); return VX_SUCCESS; }
//! \brief The kernel publisher. vx_status publishArgmaxLayer(vx_context context) { vx_kernel kernel = vxAddUserKernel(context, "com.amd.nn_extension.argmax_layer", VX_KERNEL_ARGMAX_LAYER_AMD, host_kernel, 2, validateKernel, nullptr, nullptr); ERROR_CHECK_OBJECT(kernel); amd_kernel_query_target_support_f query_target_support_f = query_target_support; amd_kernel_opencl_codegen_callback_f opencl_codegen_callback_f = opencl_codegen; ERROR_CHECK_STATUS(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); ERROR_CHECK_STATUS(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_OPENCL_CODEGEN_CALLBACK, &opencl_codegen_callback_f, sizeof(opencl_codegen_callback_f))); // set kernel parameters. ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_REFERENCE, VX_PARAMETER_STATE_REQUIRED)); // finalize and release kernel object. ERROR_CHECK_STATUS(vxFinalizeKernel(kernel)); ERROR_CHECK_STATUS(vxReleaseKernel(&kernel)); return VX_SUCCESS; }
vx_status publishTensorAdd(vx_context context) { // add kernel to the context with callbacks vx_kernel kernel = vxAddUserKernel(context, "org.khronos.openvx.tensor_add", VX_KERNEL_TENSOR_ADD, processTensorAddition, 4, validateTensorAddition, initializeTensorAddition, uninitializeTensorAddition); ERROR_CHECK_OBJECT(kernel); // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers vx_bool enableBufferAccess = vx_true_e; ERROR_CHECK_STATUS(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_OPENCL_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); // set kernel parameters ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); // finalize and release kernel object ERROR_CHECK_STATUS(vxFinalizeKernel(kernel)); ERROR_CHECK_STATUS(vxReleaseKernel(&kernel)); return VX_SUCCESS; }
//! \brief The kernel publisher. vx_status publishTensorToImageConvert(vx_context context) { vx_kernel kernel = vxAddUserKernel(context, "com.amd.nn_extension.convert_tensor_to_image", VX_KERNEL_CONVERT_TENSOR_TO_IMAGE_AMD, host_kernel, 5, validateTensorToImageKernel, nullptr, nullptr); ERROR_CHECK_OBJECT(kernel); amd_kernel_query_target_support_f query_target_support_f = query_target_support; amd_kernel_opencl_codegen_callback_f opencl_codegen_callback_f = opencl_codegen; ERROR_CHECK_STATUS(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); ERROR_CHECK_STATUS(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_OPENCL_CODEGEN_CALLBACK, &opencl_codegen_callback_f, sizeof(opencl_codegen_callback_f))); // set kernel parameters. ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_IMAGE, VX_PARAMETER_STATE_REQUIRED)); ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); ERROR_CHECK_STATUS(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); // finalize and release kernel object. ERROR_CHECK_STATUS(vxFinalizeKernel(kernel)); ERROR_CHECK_STATUS(vxReleaseKernel(&kernel)); return VX_SUCCESS; }
int main(int argc, char *argv[]) { vx_status status = VX_SUCCESS; vx_context context = vxCreateContext(); if (vxGetStatus((vx_reference)context) == VX_SUCCESS) { vx_char implementation[VX_MAX_IMPLEMENTATION_NAME]; vx_char *extensions = NULL; vx_int32 m, modules = 0; vx_uint32 k, kernels = 0; vx_uint32 p, parameters = 0; vx_uint32 a = 0; vx_uint16 vendor, version; vx_size size = 0; vx_kernel_info_t *table = NULL; // take each arg as a module name to load for (m = 1; m < argc; m++) { if (vxLoadKernels(context, argv[m]) != VX_SUCCESS) printf("Failed to load module %s\n", argv[m]); else printf("Loaded module %s\n", argv[m]); } vxPrintAllLog(context); vxRegisterHelperAsLogReader(context); vxQueryContext(context, VX_CONTEXT_VENDOR_ID, &vendor, sizeof(vendor)); vxQueryContext(context, VX_CONTEXT_VERSION, &version, sizeof(version)); vxQueryContext(context, VX_CONTEXT_IMPLEMENTATION, implementation, sizeof(implementation)); vxQueryContext(context, VX_CONTEXT_MODULES, &modules, sizeof(modules)); vxQueryContext(context, VX_CONTEXT_EXTENSIONS_SIZE, &size, sizeof(size)); printf("implementation=%s (%02x:%02x) has %u modules\n", implementation, vendor, version, modules); extensions = malloc(size); if (extensions) { vx_char *line = extensions, *token = NULL; vxQueryContext(context, VX_CONTEXT_EXTENSIONS, extensions, size); do { token = strtok(line, " "); if (token) printf("extension: %s\n", token); line = NULL; } while (token); free(extensions); } status = vxQueryContext(context, VX_CONTEXT_UNIQUE_KERNELS, &kernels, sizeof(kernels)); if (status != VX_SUCCESS) goto exit; printf("There are %u kernels\n", kernels); size = kernels * sizeof(vx_kernel_info_t); table = malloc(size); status = vxQueryContext(context, VX_CONTEXT_UNIQUE_KERNEL_TABLE, table, size); for (k = 0; k < kernels && table != NULL && status == VX_SUCCESS; k++) { vx_kernel kernel = vxGetKernelByEnum(context, table[k].enumeration); if (kernel && vxGetStatus((vx_reference)kernel) == VX_SUCCESS) { status = vxQueryKernel(kernel, VX_KERNEL_PARAMETERS, ¶meters, sizeof(parameters)); printf("\t\tkernel[%u]=%s has %u parameters (%d)\n", table[k].enumeration, table[k].name, parameters, status); for (p = 0; p < parameters; p++) { vx_parameter parameter = vxGetKernelParameterByIndex(kernel, p); vx_enum type = VX_TYPE_INVALID, dir = VX_INPUT; vx_uint32 tIdx, dIdx; status = VX_SUCCESS; status |= vxQueryParameter(parameter, VX_PARAMETER_TYPE, &type, sizeof(type)); status |= vxQueryParameter(parameter, VX_PARAMETER_DIRECTION, &dir, sizeof(dir)); for (tIdx = 0; tIdx < dimof(parameter_names); tIdx++) if (parameter_names[tIdx].tenum == type) break; for (dIdx = 0; dIdx < dimof(direction_names); dIdx++) if (direction_names[dIdx].tenum == dir) break; if (status == VX_SUCCESS) printf("\t\t\tparameter[%u] type:%s dir:%s\n", p, parameter_names[tIdx].name, direction_names[dIdx].name); vxReleaseParameter(¶meter); } for (a = 0; a < dimof(attribute_names); a++) { switch (attribute_names[a].type) { case VX_TYPE_SIZE: { vx_size value = 0; if (VX_SUCCESS == vxQueryKernel(kernel, attribute_names[a].tenum, &value, sizeof(value))) printf("\t\t\tattribute[%u] %s = "VX_FMT_SIZE"\n", attribute_names[a].tenum & VX_ATTRIBUTE_ID_MASK, attribute_names[a].name, value); break; } case VX_TYPE_UINT32: { vx_uint32 value = 0; if (VX_SUCCESS == vxQueryKernel(kernel, attribute_names[a].tenum, &value, sizeof(value))) printf("\t\t\tattribute[%u] %s = %u\n", attribute_names[a].tenum & VX_ATTRIBUTE_ID_MASK, attribute_names[a].name, value); break; } default: break; } } vxReleaseKernel(&kernel); } else { printf("ERROR: kernel %s is invalid (%d) !\n", table[k].name, status); } } for (m = 1; m < argc; m++) { if (vxUnloadKernels(context, argv[m]) != VX_SUCCESS) printf("Failed to unload module %s\n", argv[m]); else printf("Unloaded module %s\n", argv[m]); } exit: if (table) free(table); vxReleaseContext(&context); } return 0; }
static void Kernel_free(void *arg) { vxReleaseKernel((vx_kernel *)&arg); }