//////// // User kernel host side function gets called to execute the user kernel node. // Perform element-wise consine function on input tensor to produce output tensor. // // TODO:******** // 1. Get fixed-point position and dimensions of input and output tensors. // Note that both input and output tensors have same dimensions. // 2. Access input and output tensor object data using vxMapTensorPatch API. // 3. Perform element-wise cosine function using fixed-point position. // 4. Use vxUnmapTensorPatch API to give the data buffers control back to OpenVX framework. vx_status VX_CALLBACK tensor_cos_host_side_function( vx_node node, const vx_reference * refs, vx_uint32 num ) { // Get fixed-point position and dimensions of input and output tensors. // Note that both input and output tensors have same dimensions. vx_tensor input = ( vx_tensor ) refs[0]; vx_tensor output = ( vx_tensor ) refs[1]; vx_size num_of_dims; vx_size dims[4] = { 1, 1, 1, 1 }; vx_uint8 input_fixed_point_pos; vx_uint8 output_fixed_point_pos; ERROR_CHECK_STATUS( vxQueryTensor( input, VX_TENSOR_NUMBER_OF_DIMS, &num_of_dims, sizeof( num_of_dims ) ) ); ERROR_CHECK_STATUS( vxQueryTensor( input, VX_TENSOR_DIMS, &dims, num_of_dims * sizeof(vx_size) ) ); ERROR_CHECK_STATUS( vxQueryTensor( input, VX_TENSOR_FIXED_POINT_POSITION, &input_fixed_point_pos, sizeof( input_fixed_point_pos ) ) ); ERROR_CHECK_STATUS( vxQueryTensor( output, VX_TENSOR_FIXED_POINT_POSITION, &output_fixed_point_pos, sizeof( output_fixed_point_pos ) ) ); // Access input and output tensor object data using vxMapTensorPatch API. vx_size zeros[4] = { 0 }; vx_map_id map_input, map_output; vx_uint8 * buf_input, * buf_output; vx_size stride_input[4] = { 0 }; vx_size stride_output[4] = { 0 }; ERROR_CHECK_STATUS( vxMapTensorPatch( input, num_of_dims, zeros, dims, &map_input, stride_input, (void **)&buf_input, VX_READ_ONLY, VX_MEMORY_TYPE_HOST, 0 ) ); ERROR_CHECK_STATUS( vxMapTensorPatch( output, num_of_dims, zeros, dims, &map_output, stride_output, (void **)&buf_output, VX_READ_ONLY, VX_MEMORY_TYPE_HOST, 0 ) ); // Perform element-wise cosine function using fixed-point position. vx_float32 input_to_float_multiplier = 1.0f / (vx_float32)(1 << input_fixed_point_pos); vx_float32 output_to_int16_multiplier = (vx_float32)(1 << output_fixed_point_pos); for( vx_size dim3 = 0; dim3 < dims[3]; dim3++) { for( vx_size dim2 = 0; dim2 < dims[2]; dim2++) { for( vx_size dim1 = 0; dim1 < dims[1]; dim1++) { const vx_int16 * ibuf = (const vx_int16 *) (buf_input + dim3 * stride_input[3] + dim2 * stride_input[2] + dim1 * stride_input[1] ); vx_int16 * obuf = (vx_int16 *) (buf_output + dim3 * stride_output[3] + dim2 * stride_output[2] + dim1 * stride_output[1] ); for( vx_size dim0 = 0; dim0 < dims[0]; dim0++) { // no saturation done here vx_int16 ivalue = ibuf[dim0]; vx_int16 ovalue = (vx_int16)(cosf((vx_float32)ivalue * input_to_float_multiplier) * output_to_int16_multiplier + 0.5f); obuf[dim0] = ovalue; } } } } // Use vxUnmapTensorPatch API to give the data buffers control back to OpenVX framework. ERROR_CHECK_STATUS( vxUnmapTensorPatch( input, map_input ) ); ERROR_CHECK_STATUS( vxUnmapTensorPatch( output, map_output ) ); return VX_SUCCESS; }
//////// // main() has all the OpenVX application code for this exercise. // Command-line usage: // % solution_exercise3 [<video-sequence>|<camera-device-number>] // When neither video sequence nor camera device number is specified, // it defaults to the video sequence in "PETS09-S1-L1-View001.avi". int main( int argc, char * argv[] ) { // Get default video sequence when nothing is specified on command-line and // instantiate OpenCV GUI module for reading input RGB images and displaying // the image with OpenVX results const char * video_sequence = argv[1]; CGuiModule gui( video_sequence ); // Try grab first video frame from the sequence using cv::VideoCapture // and check if video frame is available if( !gui.Grab() ) { printf( "ERROR: input has no video\n" ); return 1; } //////// // Set the application configuration parameters. Note that input video // sequence is an 8-bit RGB image with dimensions given by gui.GetWidth() // and gui.GetHeight(). The parameters for the tensors are: // tensor_dims - 3 dimensions of tensor [3 x <width> x <height>] // tensor_input_fixed_point_pos - fixed-point position for input tensor // tensor_output_fixed_point_pos - fixed-point position for output tensor vx_uint32 width = gui.GetWidth(); vx_uint32 height = gui.GetHeight(); vx_size tensor_dims[3] = { width, height, 3 }; // 3 channels (RGB) vx_uint8 tensor_input_fixed_point_pos = 5; // input[-128..127] will be mapped to -4..3.96875 vx_uint8 tensor_output_fixed_point_pos = 7; // output[-1..1] will be mapped to -128 to 128 //////// // Create the OpenVX context and make sure returned context is valid and // register the log_callback to receive messages from OpenVX framework. vx_context context = vxCreateContext(); ERROR_CHECK_OBJECT( context ); vxRegisterLogCallback( context, log_callback, vx_false_e ); //////// // Register user kernels with the context. // // TODO:******** // 1. Register user kernel with context by calling your implementation of "registerUserKernel()". ERROR_CHECK_STATUS( registerUserKernel( context ) ); //////// // Create OpenVX tensor objects for input and output // // TODO:******** // 1. Create tensor objects using tensor_dims, tensor_input_fixed_point_pos, and // tensor_output_fixed_point_pos vx_tensor input_tensor = vxCreateTensor( context, 3, tensor_dims, VX_TYPE_INT16, tensor_input_fixed_point_pos ); vx_tensor output_tensor = vxCreateTensor( context, 3, tensor_dims, VX_TYPE_INT16, tensor_output_fixed_point_pos ); ERROR_CHECK_OBJECT( input_tensor ); ERROR_CHECK_OBJECT( output_tensor ); //////// // Create, build, and verify the graph with user kernel node. // // TODO:******** // 1. Build a graph with just one node created using userTensorCosNode() vx_graph graph = vxCreateGraph( context ); ERROR_CHECK_OBJECT( graph ); vx_node cos_node = userTensorCosNode( graph, input_tensor, output_tensor ); ERROR_CHECK_OBJECT( cos_node ); ERROR_CHECK_STATUS( vxReleaseNode( &cos_node ) ); ERROR_CHECK_STATUS( vxVerifyGraph( graph ) ); //////// // Process the video sequence frame by frame until the end of sequence or aborted. cv::Mat bgrMatForOutputDisplay( height, width, CV_8UC3 ); for( int frame_index = 0; !gui.AbortRequested(); frame_index++ ) { //////// // Copy input RGB frame from OpenCV into input_tensor with UINT8 to Q10.5 (INT16) conversion. // In order to do this, vxMapTensorPatch API (see "vx_ext_amd.h"). // // TODO:******** // 1. Use vxMapTensorPatch API for access to input tensor object for writing // 2. Copy UINT8 data from OpenCV RGB image to tensor object // 3. Use vxUnmapTensorPatch API to return control of buffer back to framework vx_uint8 * cv_rgb_image_buffer = gui.GetBuffer(); vx_size rgb_stride = gui.GetStride(); vx_size zeros[3] = { 0 }; vx_size tensor_stride[3]; vx_map_id map_id; vx_uint8 * buf; ERROR_CHECK_STATUS( vxMapTensorPatch( input_tensor, 3, zeros, tensor_dims, &map_id, tensor_stride, (void **)&buf, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, 0 ) ); for( vx_size c = 0; c < 3; c++ ) { for( vx_size y = 0; y < height; y++ ) { const vx_uint8 * img = cv_rgb_image_buffer + y * rgb_stride + c; vx_int16 * inp = (vx_int16 *)(buf + y * tensor_stride[1] + c * tensor_stride[2]); for( vx_size x = 0; x < width; x++ ) { // convert 0..255 to Q10.5 [-4..3.96875 range] fixed-point format inp[x] = (vx_int16)img[x * 3] - 128; } } } ERROR_CHECK_STATUS( vxUnmapTensorPatch( input_tensor, map_id ) ); //////// // Now that input tensor is ready, just run the graph. // // TODO:******** // 1. Call vxProcessGraph to execute the tensor_cos kernel in graph ERROR_CHECK_STATUS( vxProcessGraph( graph ) ); //////// // Display the output tensor object as RGB image // // TODO:******** // 1. Use vxMapTensorPatch API for access to output tensor object for reading // 2. Copy tensor object data into OpenCV RGB image // 3. Use vxUnmapTensorPatch API to return control of buffer back to framework ERROR_CHECK_STATUS( vxMapTensorPatch( output_tensor, 3, zeros, tensor_dims, &map_id, tensor_stride, (void **)&buf, VX_WRITE_ONLY, VX_MEMORY_TYPE_HOST, 0 ) ); vx_uint8 * cv_bgr_image_buffer = bgrMatForOutputDisplay.data; vx_size bgr_stride = bgrMatForOutputDisplay.step; for( vx_size c = 0; c < 3; c++ ) { for( vx_size y = 0; y < height; y++ ) { const vx_int16 * out = (const vx_int16 *)(buf + y * tensor_stride[1] + c * tensor_stride[2]); vx_uint8 * img = cv_bgr_image_buffer + y * bgr_stride + (2 - c); // (2 - c) for RGB to BGR conversion for( vx_size x = 0; x < width; x++ ) { // scale convert Q8.7 [-1..1 range] fixed-point format to 0..255 with saturation vx_int16 value = out[x] + 128; value = value > 255 ? 255 : value; // saturation needed img[x * 3] = (vx_uint8)value; } } } #if ENABLE_DISPLAY cv::imshow( "Cosine", bgrMatForOutputDisplay ); #endif ERROR_CHECK_STATUS( vxUnmapTensorPatch( output_tensor, map_id ) ); //////// // Display the results and grab the next input RGB frame for the next iteration. char text[128]; sprintf( text, "Keyboard ESC/Q-Quit SPACE-Pause [FRAME %d] [fixed_point_pos input:%d output:%d]", frame_index, tensor_input_fixed_point_pos, tensor_output_fixed_point_pos ); gui.DrawText( 0, 16, text ); gui.Show(); if( !gui.Grab() ) { // Terminate the processing loop if the end of sequence is detected. gui.WaitForKey(); break; } } //////// // To release an OpenVX object, you need to call vxRelease<Object> API which takes a pointer to the object. // If the release operation is successful, the OpenVX framework will reset the object to NULL. // // TODO:**** // 1. Release graph and tensor objects ERROR_CHECK_STATUS( vxReleaseGraph( &graph ) ); ERROR_CHECK_STATUS( vxReleaseTensor( &input_tensor ) ); ERROR_CHECK_STATUS( vxReleaseTensor( &output_tensor ) ); ERROR_CHECK_STATUS( vxReleaseContext( &context ) ); return 0; }
int CVxParamTensor::CompareFrame(int frameNumber) { // check if there is no user request to compare if (m_fileNameCompare.length() < 1) return 0; // reading data from reference file char fileName[MAX_FILE_NAME_LENGTH]; sprintf(fileName, m_fileNameCompare.c_str(), frameNumber); if(!_stricmp(fileName + strlen(fileName) - 4, ".dat")) { ReportError("ERROR: read from .dat files not supported: %s\n", fileName); } FILE * fp = fopen(fileName, m_compareFileIsBinary ? "rb" : "r"); if (!fp) { ReportError("ERROR: Unable to open: %s\n", fileName); } if (fread(m_data, 1, m_size, fp) != m_size) ReportError("ERROR: not enough data (%d bytes) in %s\n", (vx_uint32)m_size, fileName); fclose(fp); // compare vx_map_id map_id; vx_size stride[MAX_TENSOR_DIMENSIONS]; vx_uint8 * ptr; vx_status status = vxMapTensorPatch(m_tensor, m_num_of_dims, nullptr, nullptr, &map_id, stride, (void **)&ptr, VX_READ_ONLY, VX_MEMORY_TYPE_HOST, 0); if (status != VX_SUCCESS) ReportError("ERROR: vxMapTensorPatch: read failed (%d)\n", status); bool mismatchDetected = false; if (m_data_type == VX_TYPE_INT16) { vx_int32 maxError = 0; vx_int64 sumError = 0; for (vx_size d3 = 0; d3 < m_dims[3]; d3++) { for (vx_size d2 = 0; d2 < m_dims[2]; d2++) { for (vx_size d1 = 0; d1 < m_dims[1]; d1++) { vx_size roffset = m_stride[3] * d3 + m_stride[2] * d2 + m_stride[1] * d1; vx_size doffset = stride[3] * d3 + stride[2] * d2 + stride[1] * d1; const vx_int16 * buf1 = (const vx_int16 *)(((vx_uint8 *)ptr) + doffset); const vx_int16 * buf2 = (const vx_int16 *)(m_data + roffset); for (vx_size d0 = 0; d0 < m_dims[0]; d0++) { vx_int32 v1 = buf1[d0]; vx_int32 v2 = buf2[d0]; vx_int32 d = v1 - v2; d = (d < 0) ? -d : d; maxError = (d > maxError) ? d : maxError; sumError += d * d; } } } } vx_size count = m_dims[0] * m_dims[1] * m_dims[2] * m_dims[3]; float avgError = (float)sumError / (float)count; mismatchDetected = true; if (((float)maxError <= m_maxErrorLimit) && ((float)avgError <= m_avgErrorLimit)) mismatchDetected = false; if (mismatchDetected) printf("ERROR: tensor COMPARE MISMATCHED [max-err: %d] [avg-err: %.6f] for %s with frame#%d of %s\n", maxError, avgError, GetVxObjectName(), frameNumber, fileName); else if (m_verbose) printf("OK: tensor COMPARE MATCHED [max-err: %d] [avg-err: %.6f] for %s with frame#%d of %s\n", maxError, avgError, GetVxObjectName(), frameNumber, fileName); } else if (m_data_type == VX_TYPE_FLOAT32) { vx_float32 maxError = 0; vx_float64 sumError = 0; for (vx_size d3 = 0; d3 < m_dims[3]; d3++) { for (vx_size d2 = 0; d2 < m_dims[2]; d2++) { for (vx_size d1 = 0; d1 < m_dims[1]; d1++) { vx_size roffset = m_stride[3] * d3 + m_stride[2] * d2 + m_stride[1] * d1; vx_size doffset = stride[3] * d3 + stride[2] * d2 + stride[1] * d1; const vx_float32 * buf1 = (const vx_float32 *)(((vx_uint8 *)ptr) + doffset); const vx_float32 * buf2 = (const vx_float32 *)(m_data + roffset); for (vx_size d0 = 0; d0 < m_dims[0]; d0++) { vx_float32 v1 = buf1[d0]; vx_float32 v2 = buf2[d0]; vx_float32 d = v1 - v2; d = (d < 0) ? -d : d; maxError = (d > maxError) ? d : maxError; sumError += d * d; } } } } vx_size count = m_dims[0] * m_dims[1] * m_dims[2] * m_dims[3]; float avgError = (float)sumError / (float)count; mismatchDetected = true; if ((maxError <= m_maxErrorLimit) && (avgError <= m_avgErrorLimit)) mismatchDetected = false; if (mismatchDetected) printf("ERROR: tensor COMPARE MISMATCHED [max-err: %.6f] [avg-err: %.6f] for %s with frame#%d of %s\n", maxError, avgError, GetVxObjectName(), frameNumber, fileName); else if (m_verbose) printf("OK: tensor COMPARE MATCHED [max-err: %.6f] [avg-err: %.6f] for %s with frame#%d of %s\n", maxError, avgError, GetVxObjectName(), frameNumber, fileName); } else if (m_data_type == VX_TYPE_FLOAT16) { vx_float32 maxError = 0; vx_float64 sumError = 0; for (vx_size d3 = 0; d3 < m_dims[3]; d3++) { for (vx_size d2 = 0; d2 < m_dims[2]; d2++) { for (vx_size d1 = 0; d1 < m_dims[1]; d1++) { vx_size roffset = m_stride[3] * d3 + m_stride[2] * d2 + m_stride[1] * d1; vx_size doffset = stride[3] * d3 + stride[2] * d2 + stride[1] * d1; const vx_uint16 * buf1 = (const vx_uint16 *)(((vx_uint8 *)ptr) + doffset); const vx_uint16 * buf2 = (const vx_uint16 *)(m_data + roffset); for (vx_size d0 = 0; d0 < m_dims[0]; d0++) { vx_uint16 h1 = buf1[d0]; vx_uint16 h2 = buf2[d0]; vx_uint32 d1 = ((h1 & 0x8000) << 16) | (((h1 & 0x7c00) + 0x1c000) << 13) | ((h1 & 0x03ff) << 13); vx_uint32 d2 = ((h2 & 0x8000) << 16) | (((h2 & 0x7c00) + 0x1c000) << 13) | ((h2 & 0x03ff) << 13); vx_float32 v1 = *(float *)&d1; vx_float32 v2 = *(float *)&d2; vx_float32 d = v1 - v2; d = (d < 0) ? -d : d; maxError = (d > maxError) ? d : maxError; sumError += d * d; } } } } vx_size count = m_dims[0] * m_dims[1] * m_dims[2] * m_dims[3]; float avgError = (float)sumError / (float)count; mismatchDetected = true; if ((maxError <= m_maxErrorLimit) && (avgError <= m_avgErrorLimit)) mismatchDetected = false; if (mismatchDetected) printf("ERROR: tensor COMPARE MISMATCHED [max-err: %.6f] [avg-err: %.6f] for %s with frame#%d of %s\n", maxError, avgError, GetVxObjectName(), frameNumber, fileName); else if (m_verbose) printf("OK: tensor COMPARE MATCHED [max-err: %.6f] [avg-err: %.6f] for %s with frame#%d of %s\n", maxError, avgError, GetVxObjectName(), frameNumber, fileName); } else { for (vx_size d3 = 0; d3 < m_dims[3]; d3++) { for (vx_size d2 = 0; d2 < m_dims[2]; d2++) { for (vx_size d1 = 0; d1 < m_dims[1]; d1++) { vx_size roffset = m_stride[3] * d3 + m_stride[2] * d2 + m_stride[1] * d1; vx_size doffset = stride[3] * d3 + stride[2] * d2 + stride[1] * d1; if (memcpy(((vx_uint8 *)ptr) + doffset, m_data + roffset, stride[0] * m_dims[0])) { mismatchDetected = true; break; } } if (mismatchDetected) break; } if (mismatchDetected) break; } if (mismatchDetected) printf("ERROR: tensor COMPARE MISMATCHED for %s with frame#%d of %s\n", GetVxObjectName(), frameNumber, fileName); else if (m_verbose) printf("OK: tensor COMPARE MATCHED for %s with frame#%d of %s\n", GetVxObjectName(), frameNumber, fileName); } status = vxUnmapTensorPatch(m_tensor, map_id); if (status != VX_SUCCESS) ReportError("ERROR: vxUnmapTensorPatch: read failed (%d)\n", status); // report error if mismatched if (mismatchDetected) { m_compareCountMismatches++; if (!m_discardCompareErrors) return -1; } else { m_compareCountMatches++; } return 0; }