int CVxParamTensor::Shutdown(void)
{
	if (m_compareCountMatches > 0 && m_compareCountMismatches == 0) {
		printf("OK: tensor COMPARE MATCHED for %d frame(s) of %s\n", m_compareCountMatches, GetVxObjectName());
	}
	if (m_tensor) {
		vxReleaseTensor(&m_tensor);
		m_tensor = nullptr;
	}
	if (m_data) {
		delete[] m_data;
		m_data = nullptr;
	}
	if (m_memory_type == VX_MEMORY_TYPE_HOST) {
		for (vx_size active_handle = 0; active_handle < m_num_handles; active_handle++) {
			if (m_memory_handle[active_handle])
				free(m_memory_handle[active_handle]);
			m_memory_handle[active_handle] = nullptr;
		}
	}
#if ENABLE_OPENCL
	else if (m_memory_type == VX_MEMORY_TYPE_OPENCL) {
		for (vx_size active_handle = 0; active_handle < m_num_handles; active_handle++) {
			if (m_memory_handle[active_handle]) {
				int err = clReleaseMemObject((cl_mem)m_memory_handle[active_handle]);
				if (err)
					ReportError("ERROR: clReleaseMemObject(*) failed (%d)\n", err);
			}
			m_memory_handle[active_handle] = nullptr;
		}
	}
#endif
	return 0;
}
////////
// main() has all the OpenVX application code for this exercise.
// Command-line usage:
//   % solution_exercise3 [<video-sequence>|<camera-device-number>]
// When neither video sequence nor camera device number is specified,
// it defaults to the video sequence in "PETS09-S1-L1-View001.avi".
int main( int argc, char * argv[] )
{
    // Get default video sequence when nothing is specified on command-line and
    // instantiate OpenCV GUI module for reading input RGB images and displaying
    // the image with OpenVX results
    const char * video_sequence = argv[1];
    CGuiModule gui( video_sequence );

    // Try grab first video frame from the sequence using cv::VideoCapture
    // and check if video frame is available
    if( !gui.Grab() )
    {
        printf( "ERROR: input has no video\n" );
        return 1;
    }

    ////////
    // Set the application configuration parameters. Note that input video
    // sequence is an 8-bit RGB image with dimensions given by gui.GetWidth()
    // and gui.GetHeight(). The parameters for the tensors are:
    //   tensor_dims                    - 3 dimensions of tensor [3 x <width> x <height>]
    //   tensor_input_fixed_point_pos   - fixed-point position for input tensor
    //   tensor_output_fixed_point_pos  - fixed-point position for output tensor
    vx_uint32  width                         = gui.GetWidth();
    vx_uint32  height                        = gui.GetHeight();
    vx_size    tensor_dims[3]                = { width, height, 3 }; // 3 channels (RGB)
    vx_uint8   tensor_input_fixed_point_pos  = 5; // input[-128..127] will be mapped to -4..3.96875
    vx_uint8   tensor_output_fixed_point_pos = 7; // output[-1..1] will be mapped to -128 to 128

    ////////
    // Create the OpenVX context and make sure returned context is valid and
    // register the log_callback to receive messages from OpenVX framework.
    vx_context context = vxCreateContext();
    ERROR_CHECK_OBJECT( context );
    vxRegisterLogCallback( context, log_callback, vx_false_e );

    ////////
    // Register user kernels with the context.
    //
    // TODO:********
    //   1. Register user kernel with context by calling your implementation of "registerUserKernel()".
    ERROR_CHECK_STATUS( registerUserKernel( context ) );

    ////////
    // Create OpenVX tensor objects for input and output
    //
    // TODO:********
    //   1. Create tensor objects using tensor_dims, tensor_input_fixed_point_pos, and
    //      tensor_output_fixed_point_pos
    vx_tensor input_tensor   = vxCreateTensor( context, 3, tensor_dims, VX_TYPE_INT16, tensor_input_fixed_point_pos );
    vx_tensor output_tensor  = vxCreateTensor( context, 3, tensor_dims, VX_TYPE_INT16, tensor_output_fixed_point_pos );
    ERROR_CHECK_OBJECT( input_tensor );
    ERROR_CHECK_OBJECT( output_tensor );

    ////////
    // Create, build, and verify the graph with user kernel node.
    //
    // TODO:********
    //   1. Build a graph with just one node created using userTensorCosNode()
    vx_graph graph = vxCreateGraph( context );
    ERROR_CHECK_OBJECT( graph );
    vx_node cos_node = userTensorCosNode( graph, input_tensor, output_tensor );
    ERROR_CHECK_OBJECT( cos_node );
    ERROR_CHECK_STATUS( vxReleaseNode( &cos_node ) );
    ERROR_CHECK_STATUS( vxVerifyGraph( graph ) );

    ////////
    // Process the video sequence frame by frame until the end of sequence or aborted.
    cv::Mat bgrMatForOutputDisplay( height, width, CV_8UC3 );
    for( int frame_index = 0; !gui.AbortRequested(); frame_index++ )
    {
        ////////
        // Copy input RGB frame from OpenCV into input_tensor with UINT8 to Q10.5 (INT16) conversion.
        // In order to do this, vxMapTensorPatch API (see "vx_ext_amd.h").
        //
         // TODO:********
         //   1. Use vxMapTensorPatch API for access to input tensor object for writing
         //   2. Copy UINT8 data from OpenCV RGB image to tensor object
         //   3. Use vxUnmapTensorPatch API to return control of buffer back to framework
        vx_uint8 * cv_rgb_image_buffer = gui.GetBuffer();
        vx_size rgb_stride             = gui.GetStride();
        vx_size zeros[3]               = { 0 };
        vx_size tensor_stride[3];
        vx_map_id map_id;
        vx_uint8 * buf;
        ERROR_CHECK_STATUS( vxMapTensorPatch( input_tensor,
                                              3, zeros, tensor_dims,
                                              &map_id, tensor_stride,
                                              (void **)&buf, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, 0 ) );
        for( vx_size c = 0; c < 3; c++ )
        {
            for( vx_size y = 0; y < height; y++ )
            {
                const vx_uint8 * img = cv_rgb_image_buffer + y * rgb_stride + c;
                vx_int16 * inp = (vx_int16 *)(buf + y * tensor_stride[1] + c * tensor_stride[2]);
                for( vx_size x = 0; x < width; x++ )
                {
                    // convert 0..255 to Q10.5 [-4..3.96875 range] fixed-point format
                    inp[x] = (vx_int16)img[x * 3] - 128;
                }
            }
        }
        ERROR_CHECK_STATUS( vxUnmapTensorPatch( input_tensor, map_id ) );


        ////////
        // Now that input tensor is ready, just run the graph.
        //
        // TODO:********
        //   1. Call vxProcessGraph to execute the tensor_cos kernel in graph
        ERROR_CHECK_STATUS( vxProcessGraph( graph ) );

        ////////
        // Display the output tensor object as RGB image
        //
        // TODO:********
        //   1. Use vxMapTensorPatch API for access to output tensor object for reading
        //   2. Copy tensor object data into OpenCV RGB image
        //   3. Use vxUnmapTensorPatch API to return control of buffer back to framework
        ERROR_CHECK_STATUS( vxMapTensorPatch( output_tensor,
                                              3, zeros, tensor_dims,
                                              &map_id, tensor_stride,
                                              (void **)&buf, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, 0 ) );
        vx_uint8 * cv_bgr_image_buffer = bgrMatForOutputDisplay.data;
        vx_size bgr_stride             = bgrMatForOutputDisplay.step;
        for( vx_size c = 0; c < 3; c++ )
        {
            for( vx_size y = 0; y < height; y++ )
            {
                const vx_int16 * out = (const vx_int16 *)(buf + y * tensor_stride[1] + c * tensor_stride[2]);
                vx_uint8 * img = cv_bgr_image_buffer + y * bgr_stride + (2 - c); // (2 - c) for RGB to BGR conversion
                for( vx_size x = 0; x < width; x++ )
                {
                    // scale convert Q8.7 [-1..1 range] fixed-point format to 0..255 with saturation
                    vx_int16 value = out[x] + 128;
                    value = value > 255 ? 255 : value; // saturation needed
                    img[x * 3] = (vx_uint8)value;
                }
            }
        }
#if ENABLE_DISPLAY
        cv::imshow( "Cosine", bgrMatForOutputDisplay );
#endif
        ERROR_CHECK_STATUS( vxUnmapTensorPatch( output_tensor, map_id ) );

        ////////
        // Display the results and grab the next input RGB frame for the next iteration.
        char text[128];
        sprintf( text, "Keyboard ESC/Q-Quit SPACE-Pause [FRAME %d] [fixed_point_pos input:%d output:%d]", frame_index, tensor_input_fixed_point_pos, tensor_output_fixed_point_pos );
        gui.DrawText( 0, 16, text );
        gui.Show();
        if( !gui.Grab() )
        {
            // Terminate the processing loop if the end of sequence is detected.
            gui.WaitForKey();
            break;
        }
    }

    ////////
    // To release an OpenVX object, you need to call vxRelease<Object> API which takes a pointer to the object.
    // If the release operation is successful, the OpenVX framework will reset the object to NULL.
    //
    // TODO:****
    //   1. Release graph and tensor objects
    ERROR_CHECK_STATUS( vxReleaseGraph( &graph ) );
    ERROR_CHECK_STATUS( vxReleaseTensor( &input_tensor ) );
    ERROR_CHECK_STATUS( vxReleaseTensor( &output_tensor ) );
    ERROR_CHECK_STATUS( vxReleaseContext( &context ) );

    return 0;
}
int CVxParamTensor::Initialize(vx_context context, vx_graph graph, const char * desc)
{
	// get object parameters and create object
	const char * ioParams = desc;
	if (!_strnicmp(desc, "tensor:", 7) || !_strnicmp(desc, "virtual-tensor:", 15)) {
		bool isVirtual = false;
		if (!_strnicmp(desc, "virtual-tensor:", 15)) {
			isVirtual = true;
			desc += 8;
		}
		char objType[64], data_type[64];
		ioParams = ScanParameters(desc, "tensor:<num-of-dims>,{dims},<data-type>,<fixed-point-pos>", "s:D,L,s,d", objType, &m_num_of_dims, &m_num_of_dims, m_dims, data_type, &m_fixed_point_pos);
		m_data_type = ovxName2Enum(data_type);
		if (isVirtual) {
			m_tensor = vxCreateVirtualTensor(graph, m_num_of_dims, m_dims, m_data_type, m_fixed_point_pos);
		}
		else {
			m_tensor = vxCreateTensor(context, m_num_of_dims, m_dims, m_data_type, m_fixed_point_pos);
		}
	}
	else if (!_strnicmp(desc, "tensor-from-roi:", 16)) {
		char objType[64], masterName[64];
		ioParams = ScanParameters(desc, "tensor-from-view:<tensor>,<view>", "s:s,D,L,L", objType, masterName, &m_num_of_dims, &m_num_of_dims, m_start, &m_num_of_dims, m_end);
		auto itMaster = m_paramMap->find(masterName);
		if (itMaster == m_paramMap->end())
			ReportError("ERROR: tensor [%s] doesn't exist for %s\n", masterName, desc);
		vx_tensor masterTensor = (vx_tensor)itMaster->second->GetVxObject();
		m_tensor = vxCreateTensorFromView(masterTensor, m_num_of_dims, m_start, m_end);
	}
	else if (!_strnicmp(desc, "tensor-from-handle:", 19)) {
		char objType[64], data_type[64], memory_type_str[64];
		ioParams = ScanParameters(desc, "tensor-from-handle:<num-of-dims>,{dims},<data-type>,<fixed-point-pos>,{strides},<num-handles>,<memory-type>",
			"s:D,L,s,d,L,D,s", objType, &m_num_of_dims, &m_num_of_dims, m_dims, data_type, &m_fixed_point_pos, &m_num_of_dims, m_stride, &m_num_handles, memory_type_str);
		if(m_num_handles > MAX_BUFFER_HANDLES)
			ReportError("ERROR: num-handles is out of range: " VX_FMT_SIZE " (must be less than %d)\n", m_num_handles, MAX_BUFFER_HANDLES);
		m_data_type = ovxName2Enum(data_type);
		vx_uint64 memory_type = 0;
		if (GetScalarValueFromString(VX_TYPE_ENUM, memory_type_str, &memory_type) < 0)
			ReportError("ERROR: invalid memory type enum: %s\n", memory_type_str);
		m_memory_type = (vx_enum)memory_type;
		memset(m_memory_handle, 0, sizeof(m_memory_handle));
		if (m_memory_type == VX_MEMORY_TYPE_HOST) {
			// allocate all handles on host
			for (vx_size active_handle = 0; active_handle < m_num_handles; active_handle++) {
				vx_size size = m_dims[m_num_of_dims-1] * m_stride[m_num_of_dims-1];
				m_memory_handle[active_handle] = malloc(size);
				if (!m_memory_handle[active_handle])
					ReportError("ERROR: malloc(%d) failed\n", (int)size);
			}
		}
#if ENABLE_OPENCL
		else if (m_memory_type == VX_MEMORY_TYPE_OPENCL) {
			// allocate all handles on opencl
			cl_context opencl_context = nullptr;
			vx_status status = vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_OPENCL_CONTEXT, &opencl_context, sizeof(opencl_context));
			if (status)
				ReportError("ERROR: vxQueryContext(*,VX_CONTEXT_ATTRIBUTE_AMD_OPENCL_CONTEXT,...) failed (%d)\n", status);
			for (vx_size active_handle = 0; active_handle < m_num_handles; active_handle++) {
				vx_size size = m_dims[m_num_of_dims-1] * m_stride[m_num_of_dims-1];
				cl_int err = CL_SUCCESS;
				m_memory_handle[active_handle] = clCreateBuffer(opencl_context, CL_MEM_READ_WRITE, size, NULL, &err);
				if (!m_memory_handle[active_handle] || err)
					ReportError("ERROR: clCreateBuffer(*,CL_MEM_READ_WRITE,%d,NULL,*) failed (%d)\n", (int)size, err);
			}
		}
#endif
		else ReportError("ERROR: invalid memory-type enum: %s\n", memory_type_str);
		m_active_handle = 0;
		m_tensor = vxCreateTensorFromHandle(context, m_num_of_dims, m_dims, m_data_type, m_fixed_point_pos, m_stride, m_memory_handle[m_active_handle], m_memory_type);
	}
	else ReportError("ERROR: unsupported tensor type: %s\n", desc);
	vx_status ovxStatus = vxGetStatus((vx_reference)m_tensor);
	if (ovxStatus != VX_SUCCESS){
		printf("ERROR: tensor creation failed => %d (%s)\n", ovxStatus, ovxEnum2Name(ovxStatus));
		if (m_tensor) vxReleaseTensor(&m_tensor);
		throw - 1;
	}
	m_vxObjRef = (vx_reference)m_tensor;

	// io initialize
	return InitializeIO(context, graph, m_vxObjRef, ioParams);
}