static bool ult_perform_test( uint_least32_t num_output_feature_maps, uint_least32_t num_input_feature_maps, uint_least32_t input_feature_map_width, uint_least32_t input_feature_map_height, uint_least32_t kernel_width, uint_least32_t kernel_height, uint_least32_t kernel_stride_x, uint_least32_t kernel_stride_y, uint_least32_t pool_stride_x, uint_least32_t pool_stride_y, uint_least32_t pool_size_x, uint_least32_t pool_size_y, uint8_t accumulator_fraction, uint8_t output_fraction, uint_least32_t center_x, uint_least32_t center_y, NN_ACTIVATION_FUNCTION activation, NN_POOLING_MODE mode) { uint32_t IFMBlock = 16; uint32_t OFMOutBlock = 16; nn_workload_item* work_item = nullptr; nn_workload_item* work_items[12]; nn_workload_item* input_item = nullptr; nn_workload_item* input_items[12]; std::fill_n(work_items, 12, nullptr); bool passed = false; int16_t* input = 0; int16_t* output = 0; int32_t* biases = 0; int16_t* kernel = 0; int16_t* input_ref = 0; int16_t* output_ref = 0; int32_t* biases_ref = 0; int16_t* kernel_ref = 0; uint32_t NoWItems = 1; uint_least32_t output_feature_map_width = (((input_feature_map_width - kernel_width) / kernel_stride_x + 1) - pool_size_x) / pool_stride_x + 1; uint_least32_t output_feature_map_height = (((input_feature_map_height - kernel_height) / kernel_stride_y + 1) - pool_size_y) / pool_stride_y + 1; uint_least32_t output_feature_map_width_int = (input_feature_map_width - kernel_width) / kernel_stride_x + 1; uint_least32_t output_feature_map_height_int = (input_feature_map_height - kernel_height) / kernel_stride_y + 1; num_output_feature_maps += (C_simd_width - (num_output_feature_maps % C_simd_width)) % C_simd_width; // Allocate naive and optimized buffers. ult_nn_convolution_fixedpoint_comp_both_alloc( input, output, biases, kernel, input_ref, output_ref, biases_ref, kernel_ref, num_output_feature_maps, num_input_feature_maps, output_feature_map_width, output_feature_map_height, input_feature_map_width, input_feature_map_height, kernel_width, kernel_height, center_x, center_y); // Initialize both buffers. ult_nn_convolution_fixedpoint_comp_both_initialize_matrices( input, output, biases, kernel, input_ref, output_ref, biases_ref, kernel_ref, num_output_feature_maps, num_input_feature_maps, output_feature_map_width, output_feature_map_height, input_feature_map_width, input_feature_map_height, kernel_width, kernel_height, center_x, center_y); // Naive maxpooling. ult_nn_maxpooling_naive_pooling_int16_fixedpoint( input_ref, output_ref, biases_ref, kernel_ref, num_output_feature_maps, num_input_feature_maps, output_feature_map_width, output_feature_map_height, output_feature_map_width_int, output_feature_map_height_int, input_feature_map_width, input_feature_map_height, kernel_width, kernel_height, kernel_stride_x, kernel_stride_y, pool_size_x, pool_size_y, pool_stride_x, pool_stride_y, accumulator_fraction, output_fraction, center_x, center_y, activation); nn_workflow_t *workflow = nullptr; nn_device_description_t device_description; nn_device_interface_0_t device_interface_0; test_setup(device_description, device_interface_0); // shorter name for function calls nn_device_interface_0_t &di = device_interface_0; nn_workflow_item_t *workflow_input = nullptr , *workflow_output = nullptr , *workflow_convolution = nullptr; fill_workflow( &workflow, &di, &workflow_input, &workflow_output, &workflow_convolution, biases, kernel, num_input_feature_maps, num_output_feature_maps, input_feature_map_width, input_feature_map_height, kernel_width, kernel_height, kernel_stride_x, kernel_stride_y, center_x, center_y, accumulator_fraction, output_fraction, activation); // attaching input/output to workflow workflow->input[0] = workflow_input; workflow->output[0] = workflow_output; nn::data<int16_t, 3>* input_datas[] = { new nn::data<int16_t, 3>((int16_t *)input, num_input_feature_maps, input_feature_map_width, input_feature_map_height ) }; nn::data<int16_t, 3>* output_datas[] = { new nn::data<int16_t, 3>((int16_t *)output, num_output_feature_maps, output_feature_map_width, output_feature_map_height) }; if (num_input_feature_maps == 4) IFMBlock = 4; // compile workflow NN_API_STATUS status; nn_workload_t *workload; NN_WORKLOAD_DATA_TYPE io_format = NN_WORKLOAD_DATA_TYPE_I16_ZXY; EXPECT_EQ(NN_API_STATUS_OK, di.workflow_compile_function(&workload, di.device, workflow, &io_format, &io_format, 1)); EXPECT_EQ(NN_API_STATUS_OK, di.workload_execute_function(workload, (void **)input_datas, (void **)output_datas, &status)); // delete workload EXPECT_EQ(NN_API_STATUS_OK, di.workload_delete_function(workload)); // delete workflow items EXPECT_EQ(NN_API_STATUS_OK, di.workflow_item_delete_function(workflow_output)); EXPECT_EQ(NN_API_STATUS_OK, di.workflow_item_delete_function(workflow_convolution)); EXPECT_EQ(NN_API_STATUS_OK, di.workflow_item_delete_function(workflow_input)); // delete workflow EXPECT_EQ(NN_API_STATUS_OK, di.workflow_delete_function(workflow)); test_teardown(device_description, device_interface_0); // Basic check between optimized and naive versions. passed = ult_nn_convolution_fixedpoint_comp_check_outputs( output_datas[0], output_ref, num_output_feature_maps, output_feature_map_width, output_feature_map_height, center_x, center_y); ult_nn_convolution_fixedpoint_comp_both_dealloc( input, output, biases, kernel, input_ref, output_ref, biases_ref, kernel_ref); return passed; }
static bool ult_perform_test( uint32_t batch_size, uint32_t feature_map_width, uint32_t feature_map_height, uint32_t num_feature_maps, float coeff_alpha, float coeff_beta, float coeff_k, uint32_t input_fraction, uint32_t output_fraction, NN_NORMALIZATION_MODE mode ) { bool return_value = true; bool passed = true; int16_t* input = 0; int16_t* output = 0; int16_t* input_ref = 0; int16_t* output_ref = 0; // Allocate naive and optimized buffers. ult_lrn_comp_buffers_alloc( input, output, input_ref, output_ref, num_feature_maps, feature_map_width, feature_map_height, batch_size ); // Initialize both buffers. ult_lrn_comp_buffers_initialize( input, output, input_ref, output_ref, num_feature_maps, feature_map_width, feature_map_height, batch_size ); // Naive normalization_lrn ult_nn_lrn_fp_comp_naive( input_ref, output_ref, num_feature_maps, feature_map_width, feature_map_height, batch_size, input_fraction, output_fraction, coeff_alpha, coeff_beta, coeff_k ); nn_workflow_t *workflow = nullptr; nn_device_description_t device_description; nn_device_interface_0_t device_interface_0; test_setup(device_description, device_interface_0); // shorter name for function calls nn_device_interface_0_t &di = device_interface_0; nn_workflow_item_t *workflow_input = nullptr , *workflow_output = nullptr , *workflow_normalization = nullptr; fill_workflow( &workflow, &di, &workflow_input, &workflow_output, &workflow_normalization, num_feature_maps, feature_map_width, feature_map_height, batch_size, input_fraction, output_fraction, coeff_alpha, coeff_beta, coeff_k); // attaching input/output to workflow workflow->input[0] = workflow_input; workflow->output[0] = workflow_output; nn::data<int16_t, 3>* input_datas[] = { new nn::data<int16_t, 3>((int16_t *)input, num_feature_maps, feature_map_width, feature_map_height) }; nn::data<int16_t, 3>* output_datas[] = { new nn::data<int16_t, 3>((int16_t *)output, num_feature_maps, feature_map_width, feature_map_height) }; // compile workflow NN_API_STATUS status; nn_workload_t *workload; NN_WORKLOAD_DATA_TYPE io_format = NN_WORKLOAD_DATA_TYPE_I16_ZXY; EXPECT_EQ(NN_API_STATUS_OK, di.workflow_compile_function(&workload, di.device, workflow, &io_format, &io_format, 1)); EXPECT_EQ(NN_API_STATUS_OK, di.workload_execute_function(workload, (void **)input_datas, (void **)output_datas, &status)); // delete workload EXPECT_EQ(NN_API_STATUS_OK, di.workload_delete_function(workload)); // delete workflow items EXPECT_EQ(NN_API_STATUS_OK, di.workflow_item_delete_function(workflow_output)); EXPECT_EQ(NN_API_STATUS_OK, di.workflow_item_delete_function(workflow_normalization)); EXPECT_EQ(NN_API_STATUS_OK, di.workflow_item_delete_function(workflow_input)); // delete workflow EXPECT_EQ(NN_API_STATUS_OK, di.workflow_delete_function(workflow)); test_teardown(device_description, device_interface_0); //Basic check between optimized and naive versions. passed = ult_nn_lrn_fp_check_outputs( output_datas[0], output_ref, num_feature_maps, feature_map_width, feature_map_height, batch_size); ult_nn_lrn_fp_both_dealloc( input, output, input_ref, output_ref); return passed; }