static VALUE Reference_count(VALUE self) { vx_uint32 t; vx_reference ref = (vx_reference)DATA_PTR(self); vxQueryReference(ref, VX_QUERY_REF_COUNT, &t, sizeof(t)); return INT2FIX(t); }
static VALUE Reference_type(VALUE self) { vx_enum t; vx_reference ref = (vx_reference)DATA_PTR(self); vxQueryReference(ref, VX_QUERY_REF_TYPE, &t, sizeof(t)); return INT2FIX(t); }
static vx_status VX_CALLBACK validateKernel(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { // check input configuration vx_enum type, format; vx_size num_dims, input_dims[4] = { 1, 1, 1, 1 }; ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &type, sizeof(type))); ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_dims, sizeof(num_dims))); ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, input_dims, sizeof(input_dims[0])*num_dims)); if ((num_dims != 4 && num_dims != 3) || ((input_dims[0] & 3) != 0)) return VX_ERROR_INVALID_DIMENSION; if (type != VX_TYPE_FLOAT32) return VX_ERROR_INVALID_TYPE; // check output object type and set configuration ERROR_CHECK_STATUS(vxQueryReference(parameters[1], VX_REFERENCE_TYPE, &type, sizeof(type))); if (type == VX_TYPE_IMAGE) { vx_df_image format; ERROR_CHECK_STATUS(vxQueryImage((vx_image)parameters[1], VX_IMAGE_FORMAT, &format, sizeof(format))); if(format == VX_DF_IMAGE_U8 && input_dims[2] > 255) return VX_ERROR_INVALID_FORMAT; if(format == VX_DF_IMAGE_VIRT) format = (input_dims[2] < 256) ? VX_DF_IMAGE_U8 : VX_DF_IMAGE_U16; vx_uint32 width = (vx_uint32)input_dims[0]; vx_uint32 height = (vx_uint32)(input_dims[1]*input_dims[3]); ERROR_CHECK_STATUS(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_WIDTH, &width, sizeof(width))); ERROR_CHECK_STATUS(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_HEIGHT, &height, sizeof(height))); ERROR_CHECK_STATUS(vxSetMetaFormatAttribute(metas[1], VX_IMAGE_FORMAT, &format, sizeof(format))); } else if (type == VX_TYPE_TENSOR) { vx_size output_num_dims, output_dims[4] = { 1, 1, 1, 1 }; ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DATA_TYPE, &type, sizeof(type))); ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_NUMBER_OF_DIMS, &output_num_dims, sizeof(output_num_dims))); ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DIMS, output_dims, sizeof(output_dims[0])*output_num_dims)); if (output_dims[2] != 1 && output_dims[2] != 2) // top_k must be 1 or 2 return VX_ERROR_INVALID_DIMENSION; if(type == VX_TYPE_UINT8 && input_dims[2] > 255) return VX_ERROR_INVALID_FORMAT; if(type != VX_TYPE_UINT8 && type != VX_TYPE_UINT16 && type != VX_TYPE_INT16) return VX_ERROR_INVALID_FORMAT; ERROR_CHECK_STATUS(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_DATA_TYPE, &type, sizeof(type))); ERROR_CHECK_STATUS(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_NUMBER_OF_DIMS, &output_num_dims, sizeof(output_num_dims))); ERROR_CHECK_STATUS(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_DIMS, output_dims, sizeof(output_dims[0])*output_num_dims)); } else return VX_ERROR_INVALID_PARAMETERS; return VX_SUCCESS; }
VX_API_ENTRY vx_status VX_API_CALL vxSetParameterByIndex(vx_node node, vx_uint32 index, vx_reference value) { vx_status status = VX_SUCCESS; vx_enum type = 0; vx_enum data_type = 0; if (vxIsValidSpecificReference(&node->base, VX_TYPE_NODE) == vx_false_e) { VX_PRINT(VX_ZONE_ERROR, "Supplied node was not actually a node\n"); status = VX_ERROR_INVALID_REFERENCE; goto exit; } VX_PRINT(VX_ZONE_PARAMETER, "Attempting to set parameter[%u] on %s (enum:%d) to "VX_FMT_REF"\n", index, node->kernel->name, node->kernel->enumeration, value); /* is the index out of bounds? */ if ((index >= node->kernel->signature.num_parameters) || (index >= VX_INT_MAX_PARAMS)) { VX_PRINT(VX_ZONE_ERROR, "Invalid index %u\n", index); status = VX_ERROR_INVALID_VALUE; goto exit; } /* if it's an optional parameter, it's ok to be NULL */ if ((value == 0) && (node->kernel->signature.states[index] == VX_PARAMETER_STATE_OPTIONAL)) { status = VX_SUCCESS; goto exit; } /* if it's required, it's got to exist */ if (vxIsValidReference((vx_reference_t *)value) == vx_false_e) { VX_PRINT(VX_ZONE_ERROR, "Supplied value was not actually a reference\n"); status = VX_ERROR_INVALID_REFERENCE; goto exit; } /* if it was a valid reference then get the type from it */ vxQueryReference(value, VX_REF_ATTRIBUTE_TYPE, &type, sizeof(type)); VX_PRINT(VX_ZONE_PARAMETER, "Query returned type %08x for ref "VX_FMT_REF"\n", type, value); /* Check that signature type matches reference type*/ if (node->kernel->signature.types[index] != type) { /* Check special case where signature is a specific scalar type. This can happen if the vxAddParameterToKernel() passes one of the scalar vx_type_e types instead of the more generic VX_TYPE_SCALAR since the spec doesn't specify that only VX_TYPE_SCALAR should be used for scalar types in this function. */ if((type == VX_TYPE_SCALAR) && (vxQueryScalar((vx_scalar)value, VX_SCALAR_ATTRIBUTE_TYPE, &data_type, sizeof(data_type)) == VX_SUCCESS)) { if(data_type != node->kernel->signature.types[index]) { VX_PRINT(VX_ZONE_ERROR, "Invalid scalar type 0x%08x!\n", data_type); status = VX_ERROR_INVALID_TYPE; goto exit; } } else { VX_PRINT(VX_ZONE_ERROR, "Invalid type 0x%08x!\n", type); status = VX_ERROR_INVALID_TYPE; goto exit; } } if (node->parameters[index]) { if (node->parameters[index]->delay!=NULL) { // we already have a delay element here */ vx_bool res = vxRemoveAssociationToDelay(node->parameters[index], node, index); if (res == vx_false_e) { VX_PRINT(VX_ZONE_ERROR, "Internal error removing delay association\n"); status = VX_ERROR_INVALID_REFERENCE; goto exit; } } } if (value->delay!=NULL) { /* the new parameter is a delay element */ vx_bool res = vxAddAssociationToDelay(value, node, index); if (res == vx_false_e) { VX_PRINT(VX_ZONE_ERROR, "Internal error adding delay association\n"); status = VX_ERROR_INVALID_REFERENCE; goto exit; } } /* actual change of the node parameter */ vxNodeSetParameter(node, index, value); /* if the node has a child graph, find out which parameter is this */ if (node->child) { vx_uint32 p = 0; for (p = 0; p < node->child->numParams; p++) { if ((node->child->parameters[p].node == node) && (node->child->parameters[p].index == index)) { status = vxSetGraphParameterByIndex((vx_graph)node->child, p, value); break; } } } exit: if (status == VX_SUCCESS) { VX_PRINT(VX_ZONE_PARAMETER, "Assigned Node[%u] %p type:%08x ref="VX_FMT_REF"\n", index, node, type, value); } else { VX_PRINT(VX_ZONE_ERROR, "Specified: parameter[%u] type:%08x => "VX_FMT_REF"\n", index, type, value); VX_PRINT(VX_ZONE_ERROR, "Required: parameter[%u] dir:%d type:%08x\n", index, node->kernel->signature.directions[index], node->kernel->signature.types[index]); } return status; }
static VALUE Node_init(int argc, VALUE *args, VALUE self) { vx_graph graph = 0; vx_kernel kernel = 0; VALUE w,h,f; Check_Type(self, T_DATA); if (argc <= 1) rb_raise(rb_eArgError, "Not enough arguments"); graph = (vx_graph)DATA_PTR(args[0]); if (argc == 2) // Kernel { Check_Type(args[1], T_DATA); kernel = (vx_kernel)DATA_PTR(args[1]); DATA_PTR(self) = (void *)vxCreateNode(graph, kernel); } else if (argc == 3) // graph, [string|enum], array of hashes { vx_node node = 0; vx_uint32 p = 0; VALUE kern = args[1]; VALUE array = args[2]; long param = 0; if (TYPE(kern) == T_STRING) kernel = vxGetKernelByName(context, RSTRING(kern)->ptr); else if (TYPE(kern) == T_FIXNUM) kernel = vxGetKernelByEnum(context, FIX2INT(kern)); else if (TYPE(kern) == T_DATA) // a OpenVX::Kernel kernel = (vx_kernel)DATA_PTR(kern); else rb_raise(rb_eTypeError, "kernel must be a string, fixnum, or OpenVX::Kernel"); if (kernel == 0) rb_raise(rb_eNameError, "kernel could not be found in OpenVX"); Check_Type(array, T_ARRAY); node = vxCreateNode(graph, kernel); if (node == 0) rb_raise(rb_eTypeError, "node could not be created!"); REXT_PRINT("Array of parameters has len = %ld\n", RARRAY(array)->len); for (param = 0; param < RARRAY(array)->len ; param++) { VALUE dir,ref,hash; vx_reference ref2 = 0; vx_status status = 0; vx_enum type = VX_TYPE_INVALID; const char *name; hash = rb_ary_entry(array, param); Check_Type(hash, T_HASH); dir = rb_hash_aref(hash, ID2SYM(rb_intern("dir"))); ref = rb_hash_aref(hash, ID2SYM(rb_intern("ref"))); name = rb_obj_classname(ref); REXT_PRINT("rb_type(dir)=0x%x\n", TYPE(dir)); REXT_PRINT("ref class = %s\n", name); Check_Type(dir, T_FIXNUM); Check_Type(ref, T_DATA); REXT_PRINT("dir=%ld\n", FIX2UINT(dir)); ref2 = (vx_reference)DATA_PTR(ref); if (strcmp("OpenVX::Image", name) == 0) type = VX_TYPE_IMAGE; else if (strcmp("OpenVX::Buffer", name) == 0) type = VX_TYPE_BUFFER; else if (strcmp("OpenVX::Scalar", name) == 0) type = VX_TYPE_MAX; REXT_PRINT("vx type = %d (0x%08x)\n", type, type); if (type == VX_TYPE_IMAGE) // replace with format status = vxQueryImage(ref2, VX_QUERY_IMAGE_FORMAT, &type, sizeof(vx_fourcc)); else if (type == VX_TYPE_MAX) status = vxQueryReference(ref2, VX_QUERY_REF_TYPE, &type, sizeof(type)); REXT_PRINT("status = %d vx type = %d (0x%08x)\n", status, type, type); status = vxSetParameterByIndex(node, param, FIX2UINT(dir), type, ref2); REXT_PRINT("status = %d\n", status); } DATA_PTR(self) = (void *)node; } else { rb_raise(rb_eArgError, "incorrect number of arguments"); } return Qnil; }
CVxParameter * CreateDataObject(vx_context context, vx_graph graph, vx_reference ref, const char * params, vx_uint32 captureFrameStart) { // create the object based on the ref vx_enum type; vx_status status = vxQueryReference(ref, VX_REF_ATTRIBUTE_TYPE, &type, sizeof(type)); if (status) { printf("ERROR: CreateDataObject: vxQueryReference(*,VX_REF_ATTRIBUTE_TYPE,...) failed(%d)\n", status); throw -1; } if (type == VX_TYPE_IMAGE) { CVxParamImage *this_image = new CVxParamImage(); this_image->SetCaptureFrameStart(captureFrameStart); if (this_image->InitializeIO(context, graph, ref, params)) return NULL; return this_image; } else if (type == VX_TYPE_ARRAY) { CVxParamArray *this_array = new CVxParamArray(); this_array->SetCaptureFrameStart(captureFrameStart); if (this_array->InitializeIO(context, graph, ref, params)) return NULL; return this_array; } else if (type == VX_TYPE_PYRAMID) { CVxParamPyramid *this_pyramid = new CVxParamPyramid(); this_pyramid->SetCaptureFrameStart(captureFrameStart); if (this_pyramid->InitializeIO(context, graph, ref, params)) return NULL; return this_pyramid; } else if (type == VX_TYPE_DISTRIBUTION) { CVxParamDistribution *this_distribution = new CVxParamDistribution(); this_distribution->SetCaptureFrameStart(captureFrameStart); if (this_distribution->InitializeIO(context, graph, ref, params)) return NULL; return this_distribution; } else if (type == VX_TYPE_CONVOLUTION) { CVxParamConvolution *this_convolution = new CVxParamConvolution(); this_convolution->SetCaptureFrameStart(captureFrameStart); if (this_convolution->InitializeIO(context, graph, ref, params)) return NULL; return this_convolution; } else if (type == VX_TYPE_LUT) { CVxParamLUT *this_LUT = new CVxParamLUT(); this_LUT->SetCaptureFrameStart(captureFrameStart); if (this_LUT->InitializeIO(context, graph, ref, params)) return NULL; return this_LUT; } else if (type == VX_TYPE_MATRIX) { CVxParamMatrix *this_matrix = new CVxParamMatrix(); this_matrix->SetCaptureFrameStart(captureFrameStart); if (this_matrix->InitializeIO(context, graph, ref, params)) return NULL; return this_matrix; } else if (type == VX_TYPE_REMAP) { CVxParamRemap *this_remap = new CVxParamRemap(); this_remap->SetCaptureFrameStart(captureFrameStart); if (this_remap->InitializeIO(context, graph, ref, params)) return NULL; return this_remap; } else if (type == VX_TYPE_SCALAR) { CVxParamScalar *this_scalar = new CVxParamScalar(); this_scalar->SetCaptureFrameStart(captureFrameStart); if (this_scalar->InitializeIO(context, graph, ref, params)) return NULL; return this_scalar; } else if (type == VX_TYPE_THRESHOLD) { CVxParamThreshold *this_threshold = new CVxParamThreshold(); this_threshold->SetCaptureFrameStart(captureFrameStart); if (this_threshold->InitializeIO(context, graph, ref, params)) return NULL; return this_threshold; } else return nullptr; }
//! \brief The OpenCL code generator callback. static vx_status VX_CALLBACK opencl_codegen( vx_node node, // [input] node const vx_reference parameters[], // [input] parameters vx_uint32 num, // [input] number of parameters bool opencl_load_function, // [input] false: normal OpenCL kernel; true: reserved char opencl_kernel_function_name[64], // [output] kernel_name for clCreateKernel() std::string& opencl_kernel_code, // [output] string for clCreateProgramWithSource() std::string& opencl_build_options, // [output] options for clBuildProgram() vx_uint32& opencl_work_dim, // [output] work_dim for clEnqueueNDRangeKernel() vx_size opencl_global_work[], // [output] global_work[] for clEnqueueNDRangeKernel() vx_size opencl_local_work[], // [output] local_work[] for clEnqueueNDRangeKernel() vx_uint32& opencl_local_buffer_usage_mask, // [output] reserved: must be ZERO vx_uint32& opencl_local_buffer_size_in_bytes // [output] reserved: must be ZERO ) { // get configuration vx_size num_dims, input_dims[4] = { 1, 1, 1, 1 }, top_k = 1; vx_enum output_obj_type, output_data_type = VX_TYPE_UINT16; ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_dims, sizeof(num_dims))); ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, input_dims, sizeof(input_dims[0])*num_dims)); ERROR_CHECK_STATUS(vxQueryReference(parameters[1], VX_REFERENCE_TYPE, &output_obj_type, sizeof(output_obj_type))); if(output_obj_type == VX_TYPE_IMAGE) { vx_df_image format; ERROR_CHECK_STATUS(vxQueryImage((vx_image)parameters[1], VX_IMAGE_FORMAT, &format, sizeof(format))); if(format == VX_DF_IMAGE_U8) output_data_type = VX_TYPE_UINT8; else if(format == VX_DF_IMAGE_U16) output_data_type = VX_TYPE_UINT16; } else { vx_size num_dims_output, output_dims[4] = { 1, 1, 1, 1 }; ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_NUMBER_OF_DIMS, &num_dims_output, sizeof(num_dims_output))); ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DIMS, output_dims, sizeof(output_dims[0])*num_dims_output)); ERROR_CHECK_STATUS(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DATA_TYPE, &output_data_type, sizeof(output_data_type))); top_k = output_dims[2]; } size_t N = input_dims[3]; // compute global work opencl_work_dim = 3; opencl_local_work[0] = 8; opencl_local_work[1] = 8; opencl_local_work[2] = 1; opencl_global_work[0] = ((input_dims[0] + 3) / 4 + opencl_local_work[0] - 1) & ~(opencl_local_work[0] - 1); opencl_global_work[1] = (input_dims[1] + opencl_local_work[1] - 1) & ~(opencl_local_work[1] - 1); opencl_global_work[2] = N; // generate OpenCL C code strcpy(opencl_kernel_function_name, "argmax"); char item[8192]; sprintf(item, "#pragma OPENCL EXTENSION cl_amd_media_ops : enable\n" "__kernel __attribute__((reqd_work_group_size(%ld, %ld, 1)))\n" // opencl_local_work[0] opencl_local_work[1] "void %s(__global uchar * i0_buf, uint i0_offset, uint4 i0_stride, %s)\n" "{\n" " uint x = get_global_id(0) * 4;\n" " uint y = get_global_id(1);\n" " uint z = get_global_id(2);\n" " if(x < %ld && y < %ld) {\n" " i0_buf += i0_offset + z * i0_stride.s3 + y * i0_stride.s1 + x * i0_stride.s0;\n" " uint4 cmax;\n" , opencl_local_work[0], opencl_local_work[1], opencl_kernel_function_name , (output_obj_type == VX_TYPE_IMAGE) ? "uint o0_width, uint o0_height, __global uchar * o0_buf, uint o0_stride, uint o0_offset" : "__global uchar * o0_buf, uint o0_offset, uint4 o0_stride" , input_dims[0], input_dims[1]); opencl_kernel_code = item; if(top_k == 2) { sprintf(item, " uint4 cmax1;\n" " float4 f, fmax, fmax1;\n" " fmax = *(__global float4 *)i0_buf;\n" " i0_buf += i0_stride.s2; f = *(__global float4 *)i0_buf;\n" " cmax1.s0 = (f.s0 > fmax.s0) ? 0 : 1;\n" " cmax.s0 = (f.s0 > fmax.s0) ? 1 : 0;\n" " fmax1.s0 = (f.s0 > fmax.s0) ? fmax.s0 : f.s0;\n" " fmax.s0 = (f.s0 > fmax.s0) ? f.s0 : fmax.s0;\n" " cmax1.s1 = (f.s1 > fmax.s1) ? 0 : 1;\n" " cmax.s1 = (f.s1 > fmax.s1) ? 1 : 0;\n" " fmax1.s1 = (f.s1 > fmax.s1) ? fmax.s1 : f.s1;\n" " fmax.s1 = (f.s1 > fmax.s1) ? f.s1 : fmax.s1;\n" " cmax1.s2 = (f.s2 > fmax.s2) ? 0 : 1;\n" " cmax.s2 = (f.s2 > fmax.s2) ? 1 : 0;\n" " fmax1.s2 = (f.s2 > fmax.s2) ? fmax.s2 : f.s2;\n" " fmax.s2 = (f.s2 > fmax.s2) ? f.s2 : fmax.s2;\n" " cmax1.s3 = (f.s3 > fmax.s3) ? 0 : 1;\n" " cmax.s3 = (f.s3 > fmax.s3) ? 1 : 0;\n" " fmax1.s3 = (f.s3 > fmax.s3) ? fmax.s3 : f.s3;\n" " fmax.s3 = (f.s3 > fmax.s3) ? f.s3 : fmax.s3;\n" " for(uint c = 2; c < %ld; c++) {\n" " i0_buf += i0_stride.s2; f = *(__global float4 *)i0_buf;\n" " cmax1.s0 = (f.s0 > fmax.s0) ? cmax.s0 : ((f.s0 > fmax1.s0) ? c : cmax1.s0);\n" " fmax1.s0 = (f.s0 > fmax.s0) ? fmax.s0 : ((f.s0 > fmax1.s0) ? f.s0 : fmax1.s0);\n" " cmax.s0 = (f.s0 > fmax.s0) ? c : cmax.s0;\n" " fmax.s0 = (f.s0 > fmax.s0) ? f.s0 : fmax.s0;\n" " cmax1.s1 = (f.s1 > fmax.s1) ? cmax.s1 : ((f.s1 > fmax1.s1) ? c : cmax1.s1);\n" " fmax1.s1 = (f.s1 > fmax.s1) ? fmax.s1 : ((f.s1 > fmax1.s1) ? f.s1 : fmax1.s1);\n" " cmax.s1 = (f.s1 > fmax.s1) ? c : cmax.s1;\n" " fmax.s1 = (f.s1 > fmax.s1) ? f.s1 : fmax.s1;\n" " cmax1.s2 = (f.s2 > fmax.s2) ? cmax.s2 : ((f.s2 > fmax1.s2) ? c : cmax1.s2);\n" " fmax1.s2 = (f.s2 > fmax.s2) ? fmax.s2 : ((f.s2 > fmax1.s2) ? f.s2 : fmax1.s2);\n" " cmax.s2 = (f.s2 > fmax.s2) ? c : cmax.s2;\n" " fmax.s2 = (f.s2 > fmax.s2) ? f.s2 : fmax.s2;\n" " cmax1.s3 = (f.s3 > fmax.s3) ? cmax.s3 : ((f.s3 > fmax1.s3) ? c : cmax1.s3);\n" " fmax1.s3 = (f.s3 > fmax.s3) ? fmax.s3 : ((f.s3 > fmax1.s3) ? f.s3 : fmax1.s3);\n" " cmax.s3 = (f.s3 > fmax.s3) ? c : cmax.s3;\n" " fmax.s3 = (f.s3 > fmax.s3) ? f.s3 : fmax.s3;\n" " }\n" , input_dims[2]); opencl_kernel_code += item; } else if (top_k == 1) { sprintf(item, " cmax = (uint4)0;\n" " float4 fmax = *(__global float4 *)i0_buf;\n" " for(uint c = 1; c < %ld; c++) {\n" " i0_buf += i0_stride.s2;\n" " float4 f = *(__global float4 *)i0_buf;\n" " cmax.s0 = (f.s0 > fmax.s0) ? c : cmax.s0;\n" " fmax.s0 = (f.s0 > fmax.s0) ? f.s0 : fmax.s0;\n" " cmax.s1 = (f.s1 > fmax.s1) ? c : cmax.s1;\n" " fmax.s1 = (f.s1 > fmax.s1) ? f.s1 : fmax.s1;\n" " cmax.s2 = (f.s2 > fmax.s2) ? c : cmax.s2;\n" " fmax.s2 = (f.s2 > fmax.s2) ? f.s2 : fmax.s2;\n" " cmax.s3 = (f.s3 > fmax.s3) ? c : cmax.s3;\n" " fmax.s3 = (f.s3 > fmax.s3) ? f.s3 : fmax.s3;\n" " }\n" , input_dims[2]); opencl_kernel_code += item; } if(output_data_type == VX_TYPE_UINT8) { if(output_obj_type == VX_TYPE_IMAGE) { sprintf(item, " o0_buf += o0_offset + (z * %ld + y) * o0_stride + x;\n" , input_dims[1]); opencl_kernel_code += item; } else { opencl_kernel_code += " o0_buf += o0_offset + z * o0_stride.s3 + y * o0_stride.s1 + x * o0_stride.s0;\n"; } opencl_kernel_code += " uint imax = cmax.s0 + (cmax.s1 << 8) + (cmax.s2 << 16) + (cmax.s3 << 24);\n" " *(__global uint *)o0_buf = imax;\n"; if(top_k == 2) { opencl_kernel_code += " uint imax1 = cmax1.s0 + (cmax1.s1 << 8) + (cmax1.s2 << 16) + (cmax1.s3 << 24);\n" " *(__global uint *)&o0_buf[o0_stride.s2] = imax1;\n"; } } else if(output_data_type == VX_TYPE_UINT16) { if(output_obj_type == VX_TYPE_IMAGE) { sprintf(item, " o0_buf += o0_offset + (z * %ld + y) * o0_stride + x * 2;\n" , input_dims[1]); opencl_kernel_code += item; } else { opencl_kernel_code += " o0_buf += o0_offset + z * o0_stride.s3 + y * o0_stride.s1 + x * o0_stride.s0;\n"; } opencl_kernel_code += " uint2 imax;\n" " imax.s0 = cmax.s0 + (cmax.s1 << 16);\n" " imax.s1 = cmax.s2 + (cmax.s3 << 16);\n" " *(__global uint2 *)o0_buf = imax;\n"; if(top_k == 2) { opencl_kernel_code += " uint2 imax1;\n" " imax1.s0 = cmax1.s0 + (cmax1.s1 << 16);\n" " imax1.s1 = cmax1.s2 + (cmax1.s3 << 16);\n" " *(__global uint2 *)&o0_buf[o0_stride.s2] = imax1;\n"; } } opencl_kernel_code += " }\n" "}\n"; #if ENABLE_DEBUG_PRINT_DIMS std::cout << "KERNEL argmax_layer output " << input_dims[0] << "x" << input_dims[1] << " " << std::endl; #endif return VX_SUCCESS; }