void cleanup() { if(!is_valid()) throw std::runtime_error( error_ ); for(auto wl : workflow_layer) di->workflow_item_delete_function( wl ); di->workflow_delete_function( workflow ); }
void cleanup() { if(!is_valid()) throw std::runtime_error(error_); for(auto wl : workflow_layer) di->workflow_item_delete_function(wl); di->workflow_delete_function(workflow); for(auto wb : workflow_layer_factor) if(wb!=nullptr) delete wb; }
void cleanup(){ if(!is_valid()) throw std::runtime_error(error_); /* ****************************************************************************************** */ /* Cleanup in memory */ /* ****************************************************************************************** */ std::cout << "Cleanup in memory" << std::endl << "========================================================" << std::endl; di->workflow_item_delete_function(wrkflwi_input); di->workflow_item_delete_function(wrkflwi_stage_1_conv); di->workflow_item_delete_function(wrkflwi_stage_1_pool); di->workflow_item_delete_function(wrkflwi_stage_1_subv); di->workflow_item_delete_function(wrkflwi_stage_2_conv); di->workflow_item_delete_function(wrkflwi_stage_2_pool); di->workflow_item_delete_function(wrkflwi_stage_3_fc); di->workflow_item_delete_function(wrkflwi_stage_4_fc); di->workflow_item_delete_function(wrkflwi_softmax); di->workflow_item_delete_function(wrkflwi_output); di->workflow_delete_function(workflow); delete nnwrkld_conv1_weights; delete nnwrkld_conv1_biases; delete nnwrkld_conv2_weights; delete nnwrkld_conv2_biases; delete nnwrkld_fc1_weights; delete nnwrkld_fc1_biases; delete nnwrkld_fc2_weights; delete nnwrkld_fc2_biases; delete di; }
virtual nn_workflow_t *init_test_workflow( nn_device_interface_0_t *_di ) { if(!is_valid()) throw std::runtime_error( error_ ); for(auto wi : workflow_layer) wi = nullptr; this->di = _di; di->workflow_create_function( &workflow, 1, 1 ); // STAGE 0 (input) { di->workflow_item_create_function( &workflow_layer[input], 0, nullptr, 1 ); workflow_layer[input]->type = NN_WORK_ITEM_TYPE_INPUT; workflow_layer[input]->arguments.input.index = 0; workflow_layer[input]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[input]->output_format[0].format_1d = { { relu_length } }; } // STAGE 1 relu { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[input], 0 }; di->workflow_item_create_function( &workflow_layer[relu], 1, &inputs_descriptor, 1 ); workflow_layer[relu]->type = NN_WORK_ITEM_TYPE_RELU; workflow_layer[relu]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[relu]->output_format[0].format_1d = { { relu_length } }; } // ------------------------------------------------------------------------------------------ // STAGE 2 output { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[relu], 0 }; di->workflow_item_create_function( &workflow_layer[output], 1, &inputs_descriptor, 1 ); workflow_layer[output]->type = NN_WORK_ITEM_TYPE_OUTPUT; workflow_layer[output]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[output]->output_format[0].format_3d = { { relu_length } }; } // ------------------------------------------------------------------------------------------- // END of workflow stages definition workflow->input[0] = workflow_layer[input]; workflow->output[0] = workflow_layer[output]; // ------------------------------------------------------------------------------------------- return workflow; }
void cleanup() { if(!is_valid()) throw std::runtime_error(error_); for(auto wl : workflow_layer) di->workflow_item_delete_function(wl); di->workflow_delete_function(workflow); for(auto wlwf : workflow_layer_weights_float) if(wlwf!=nullptr) delete wlwf; for(auto wlwi : workflow_layer_weights_int16) if(wlwi!=nullptr) delete wlwi; for(auto wlbi : workflow_layer_biases_int32) if(wlbi!=nullptr) delete wlbi; for(auto wlbf : workflow_layer_biases_float) if(wlbf!=nullptr) delete wlbf; if(mean_factor!=nullptr) delete mean_factor; }
virtual nn_workflow_t *init_workflow(nn_device_interface_0_t *di){ if(!is_valid()) throw std::runtime_error(error_); this->di = di; std::cout << "--------------------------------------------------------" << std::endl << "Loading weights and biases" << std::endl << std::endl; // Load weights and biases auto load_biases_or_weights = [](std::string wb_file_name) { nn::data<float> *wb_pointer = nn_data_load_from_file_time_measure(wb_file_name); if(wb_pointer == nullptr) { std::cerr << "Can't load " << wb_file_name << std::endl; throw; } return wb_pointer; }; try { nnwrkld_conv1_weights = load_biases_or_weights("weights_lenet/conv1.nn"); nnwrkld_conv1_biases = load_biases_or_weights("weights_lenet/conv1_bias.nn"); nnwrkld_conv2_weights = load_biases_or_weights("weights_lenet/conv2.nn"); nnwrkld_conv2_biases = load_biases_or_weights("weights_lenet/conv2_bias.nn"); nnwrkld_fc1_weights = load_biases_or_weights("weights_lenet/ip1.nn"); nnwrkld_fc1_biases = load_biases_or_weights("weights_lenet/ip1_bias.nn"); nnwrkld_fc2_weights = load_biases_or_weights("weights_lenet/ip2.nn"); nnwrkld_fc2_biases = load_biases_or_weights("weights_lenet/ip2_bias.nn"); } catch(...) { return workflow; } std::cout << "--------------------------------------------------------" << std::endl << "Build of workflow" << std::endl; di->workflow_create_function(&workflow, 1, 1); // ------------------------------------------------------------------------------------------ // STAGE 0 (input) // output: 28x28x3 { di->workflow_item_create_function(&wrkflwi_input, 0, nullptr, 1); wrkflwi_input->type = NN_WORK_ITEM_TYPE_INPUT; wrkflwi_input->arguments.input.index = 0; wrkflwi_input->output_format[0].format = NN_DATA_FORMAT_2D; wrkflwi_input->output_format[0].format_3d ={ { img_size, img_size} }; } // ------------------------------------------------------------------------------------------ // STAGE 01 // convo: 5x5 stride 1x1; no-activation; output: 24x24x20 // maxpool: 2x2 stride 2x2; // output: 12x12x20 { nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_input, 0 }; di->workflow_item_create_function(&wrkflwi_stage_1_conv, 1, &inputs_descriptor, 1); wrkflwi_stage_1_conv->type = NN_WORK_ITEM_TYPE_CONVOLUTION; wrkflwi_stage_1_conv->name = "c1"; wrkflwi_stage_1_conv->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; wrkflwi_stage_1_conv->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_NONE; // We have weights, biases for 20 filters , but we want to have for four more filters so lets add padding wrkflwi_stage_1_conv->arguments.forward_convolution.weights = nn_data_extend_weights_by_padding(nnwrkld_conv1_weights,1,24); wrkflwi_stage_1_conv->arguments.forward_convolution.biases = nn_data_extend_biases_by_padding(nnwrkld_conv1_biases,24); wrkflwi_stage_1_conv->arguments.forward_convolution.center_offset[0] = 0; wrkflwi_stage_1_conv->arguments.forward_convolution.center_offset[1] = 0; wrkflwi_stage_1_conv->arguments.forward_convolution.stride[0] = 1; wrkflwi_stage_1_conv->arguments.forward_convolution.stride[1] = 1; wrkflwi_stage_1_conv->output_format[0].format = NN_DATA_FORMAT_3D; // It should be 20 output FM , but we do support only case when output FM number is divisble by 8 wrkflwi_stage_1_conv->output_format[0].format_3d ={ { 24, 24, 24 } }; } { nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_stage_1_conv, 0 }; di->workflow_item_create_function(&wrkflwi_stage_1_pool, 1, &inputs_descriptor, 1); wrkflwi_stage_1_pool->type = NN_WORK_ITEM_TYPE_POOLING; wrkflwi_stage_1_pool->name = "p1"; wrkflwi_stage_1_pool->arguments.forward_pooling.mode = NN_POOLING_MODE_MAX; wrkflwi_stage_1_pool->arguments.forward_pooling.size[0] = 2; wrkflwi_stage_1_pool->arguments.forward_pooling.size[1] = 2; wrkflwi_stage_1_pool->arguments.forward_pooling.stride[0] = 2; wrkflwi_stage_1_pool->arguments.forward_pooling.stride[1] = 2; wrkflwi_stage_1_pool->output_format[0].format = NN_DATA_FORMAT_3D; wrkflwi_stage_1_pool->output_format[0].format_3d ={ { 12, 12, 24 } }; } // view { nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_stage_1_pool, 0 }; di->workflow_item_create_function(&wrkflwi_stage_1_subv, 1, &inputs_descriptor, 1); // view wrkflwi_stage_1_subv->type = NN_WORK_ITEM_TYPE_VIEW; wrkflwi_stage_1_subv->arguments.view.origin[0] = 0; wrkflwi_stage_1_subv->arguments.view.origin[1] = 0; wrkflwi_stage_1_subv->arguments.view.origin[2] = 0; wrkflwi_stage_1_subv->output_format[0].format = NN_DATA_FORMAT_3D; wrkflwi_stage_1_subv->output_format[0].format_3d ={ { 12, 12, 20 } }; } // ------------------------------------------------------------------------------------------ // STAGE 02 // convo: 5x5 stride 1x1; no-activation; output: 8x8x50 // maxpool: 2x2 stride 2x2; // output: 4x4x50 // convolution 2 { nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_stage_1_subv, 0 }; di->workflow_item_create_function(&wrkflwi_stage_2_conv, 1, &inputs_descriptor, 1); wrkflwi_stage_2_conv->type = NN_WORK_ITEM_TYPE_CONVOLUTION; wrkflwi_stage_2_conv->name = "c2"; wrkflwi_stage_2_conv->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_NONE; wrkflwi_stage_2_conv->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; wrkflwi_stage_2_conv->arguments.forward_convolution.weights = nn_data_extend_weights_by_padding(nnwrkld_conv2_weights,20,56); wrkflwi_stage_2_conv->arguments.forward_convolution.biases = nn_data_extend_biases_by_padding(nnwrkld_conv2_biases,56); wrkflwi_stage_2_conv->arguments.forward_convolution.center_offset[0] = 0; wrkflwi_stage_2_conv->arguments.forward_convolution.center_offset[1] = 0; wrkflwi_stage_2_conv->arguments.forward_convolution.stride[0] = 1; wrkflwi_stage_2_conv->arguments.forward_convolution.stride[1] = 1; wrkflwi_stage_2_conv->output_format[0].format = NN_DATA_FORMAT_3D; // It should be 50 output FM , but we do support only case when output FM number is divisble by 8 wrkflwi_stage_2_conv->output_format[0].format_3d ={ { 8, 8, 56 } }; } // maxpool: 2x2 stride 2x2; { nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_stage_2_conv, 0 }; di->workflow_item_create_function(&wrkflwi_stage_2_pool, 1, &inputs_descriptor, 1); // pooling wrkflwi_stage_2_pool->type = NN_WORK_ITEM_TYPE_POOLING; wrkflwi_stage_2_pool->name = "p2"; wrkflwi_stage_2_pool->arguments.forward_pooling.mode = NN_POOLING_MODE_MAX; wrkflwi_stage_2_pool->arguments.forward_pooling.size[0] = 2; wrkflwi_stage_2_pool->arguments.forward_pooling.size[1] = 2; wrkflwi_stage_2_pool->arguments.forward_pooling.stride[0] = 2; wrkflwi_stage_2_pool->arguments.forward_pooling.stride[1] = 2; wrkflwi_stage_2_pool->output_format[0].format = NN_DATA_FORMAT_3D; wrkflwi_stage_2_pool->output_format[0].format_3d ={ { 4, 4, 56 } }; } // ------------------------------------------------------------------------------------------ // STAGE 03 // full: ReLU // output: 500 { nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_stage_2_pool, 0 }; di->workflow_item_create_function(&wrkflwi_stage_3_fc, 1, &inputs_descriptor, 1); wrkflwi_stage_3_fc->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED; wrkflwi_stage_3_fc->name = "fc1"; wrkflwi_stage_3_fc->arguments.forward_fully_connected.activation.function = NN_ACTIVATION_FUNCTION_RELU; // Generated weights if taken from caffe , are in 2D format while we need them in 4d format nn::data<float>* nnwrkld_fc1_converted_weights = nn_data_convert_weights_2D_to_4D(nnwrkld_fc1_weights, 4, 4, 50, nnwrkld_fc1_weights->size[1]); // release original weights delete nnwrkld_fc1_weights; // Extend weights' depth of FC layer to match extended weights input nnwrkld_fc1_weights = nn_data_extend_weights_by_padding(nnwrkld_fc1_converted_weights,56,nnwrkld_fc1_converted_weights->size[3]); delete nnwrkld_fc1_converted_weights; nnwrkld_fc1_converted_weights = nullptr; wrkflwi_stage_3_fc->arguments.forward_fully_connected.weights = nnwrkld_fc1_weights; wrkflwi_stage_3_fc->arguments.forward_fully_connected.biases = nnwrkld_fc1_biases; wrkflwi_stage_3_fc->output_format[0].format = NN_DATA_FORMAT_1D; wrkflwi_stage_3_fc->output_format[0].format_1d ={ { 500 } }; } // ------------------------------------------------------------------------------------------ // STAGE 04 // full: ; // output: 10 { nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_stage_3_fc, 0 }; di->workflow_item_create_function(&wrkflwi_stage_4_fc, 1, &inputs_descriptor, 1); wrkflwi_stage_4_fc->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED; wrkflwi_stage_4_fc->name = "fc2"; wrkflwi_stage_4_fc->arguments.forward_fully_connected.activation.function = NN_ACTIVATION_FUNCTION_NONE; wrkflwi_stage_4_fc->arguments.forward_fully_connected.weights = nnwrkld_fc2_weights; wrkflwi_stage_4_fc->arguments.forward_fully_connected.biases = nnwrkld_fc2_biases; wrkflwi_stage_4_fc->output_format[0].format = NN_DATA_FORMAT_1D; wrkflwi_stage_4_fc->output_format[0].format_1d ={ { 10 } }; } // ------------------------------------------------------------------------------------------ // STAGE 05 (softmax) // output: 10 { nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_stage_4_fc, 0 }; di->workflow_item_create_function(&wrkflwi_softmax, 1, &inputs_descriptor, 1); wrkflwi_softmax->type = NN_WORK_ITEM_TYPE_SOFTMAX; wrkflwi_softmax->output_format[0].format = NN_DATA_FORMAT_1D; wrkflwi_softmax->output_format[0].format_1d ={ { 10 } }; } // ------------------------------------------------------------------------------------------ // STAGE 6 (output) // output: 10 { nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_softmax, 0 }; di->workflow_item_create_function(&wrkflwi_output, 1, &inputs_descriptor, 1); wrkflwi_output->type = NN_WORK_ITEM_TYPE_OUTPUT; wrkflwi_output->output_format[0].format = NN_DATA_FORMAT_1D; wrkflwi_output->output_format[0].format_1d ={ { 10 } }; } // ------------------------------------------------------------------------------------------- // END of workflow stages definition // ------------------------------------------------------------------------------------------- workflow->input[0] = wrkflwi_input; workflow->output[0] = wrkflwi_output; // ------------------------------------------------------------------------------------------- return workflow; }
virtual nn_workflow_t *init_test_workflow(nn_device_interface_0_t *_di) { if(!is_valid()) throw std::runtime_error(error_); this->di = _di; // load nn:data factors (weights and biases) for successive layers mean_factor = nn_data_load_from_file("weights_caffenet/imagenet_mean.nnd"); workflow_layer_weights_float[conv1_factor] = nn_data_load_from_file("weights_caffenet/conv1.nnd"); workflow_layer_biases_float[conv1_factor] = nn_data_load_from_file("weights_caffenet/conv1_bias.nnd"); workflow_layer_weights_float[conv2_1_factor] = nn_data_load_from_file("weights_caffenet/conv2_g1.nnd"); workflow_layer_biases_float[conv2_1_factor] = nn_data_load_from_file("weights_caffenet/conv2_bias_g1.nnd"); workflow_layer_weights_float[conv2_2_factor] = nn_data_load_from_file("weights_caffenet/conv2_g2.nnd"); workflow_layer_biases_float[conv2_2_factor] = nn_data_load_from_file("weights_caffenet/conv2_bias_g2.nnd"); workflow_layer_weights_float[conv3_factor] = nn_data_load_from_file("weights_caffenet/conv3.nnd"); workflow_layer_biases_float[conv3_factor] = nn_data_load_from_file("weights_caffenet/conv3_bias.nnd"); workflow_layer_weights_float[conv4_1_factor] = nn_data_load_from_file("weights_caffenet/conv4_g1.nnd"); workflow_layer_biases_float[conv4_1_factor] = nn_data_load_from_file("weights_caffenet/conv4_bias_g1.nnd"); workflow_layer_weights_float[conv4_2_factor] = nn_data_load_from_file("weights_caffenet/conv4_g2.nnd"); workflow_layer_biases_float[conv4_2_factor] = nn_data_load_from_file("weights_caffenet/conv4_bias_g2.nnd"); workflow_layer_weights_float[conv5_1_factor] = nn_data_load_from_file("weights_caffenet/conv5_g1.nnd"); workflow_layer_biases_float[conv5_1_factor] = nn_data_load_from_file("weights_caffenet/conv5_bias_g1.nnd"); workflow_layer_weights_float[conv5_2_factor] = nn_data_load_from_file("weights_caffenet/conv5_g2.nnd"); workflow_layer_biases_float[conv5_2_factor] = nn_data_load_from_file("weights_caffenet/conv5_bias_g2.nnd"); workflow_layer_weights_float[fc6_factor] = nn_data_load_from_file("weights_caffenet/fc6.nnd"); workflow_layer_biases_float[fc6_factor] = nn_data_load_from_file("weights_caffenet/fc6_bias.nnd"); workflow_layer_weights_float[fc7_factor] = nn_data_load_from_file("weights_caffenet/fc7.nnd"); workflow_layer_biases_float[fc7_factor] = nn_data_load_from_file("weights_caffenet/fc7_bias.nnd"); workflow_layer_weights_float[fc8_factor] = nn_data_load_from_file("weights_caffenet/fc8.nnd"); workflow_layer_biases_float[fc8_factor] = nn_data_load_from_file("weights_caffenet/fc8_bias.nnd"); for (auto wlwf : workflow_layer_weights_float) if (wlwf == nullptr) throw std::runtime_error("error: one or more of file with weights was not loaded"); for (auto wlbf : workflow_layer_biases_float) if (wlbf == nullptr) throw std::runtime_error("error: one or more of file with biases was not loaded"); di->workflow_create_function(&workflow,1,1); // { c1 c2_1 c2_2 c3 c4_1 c4_2 c5_1 c5_2 fc6 fc7 fc8 } const size_t nnwrkld_accumulator_fraction[last_factor+1] = { 16, 19, 17, 22, 22, 22, 23, 22, 24, 26, 24 }; const size_t nnwrkld_output_fraction[last_factor+1] = { 3, 7, 7, 6, 7, 7, 8, 8, 10, 12, 26 }; const size_t nnwrkld_weights_float_fraction[last_factor+1] = { 16, 16, 14, 15, 16, 16, 16, 15, 16, 16, 12 }; const size_t nnwrkld_biases_float_fraction[last_factor+1] = {nnwrkld_accumulator_fraction[conv1_factor], nnwrkld_accumulator_fraction[conv2_1_factor], nnwrkld_accumulator_fraction[conv2_2_factor], nnwrkld_accumulator_fraction[conv3_factor], nnwrkld_accumulator_fraction[conv4_1_factor], nnwrkld_accumulator_fraction[conv4_2_factor], nnwrkld_accumulator_fraction[conv5_1_factor], nnwrkld_accumulator_fraction[conv5_2_factor], nnwrkld_accumulator_fraction[fc6_factor], nnwrkld_accumulator_fraction[fc7_factor], nnwrkld_accumulator_fraction[fc8_factor] }; for(auto i = 0; i<=last_factor;++i) { workflow_layer_weights_int16[i] = new nn::data<int16_t>(static_cast<const size_t*>(workflow_layer_weights_float[i]->size),workflow_layer_weights_float[i]->dimension); workflow_layer_biases_int32[i] = new nn::data<int32_t>(static_cast<const size_t*>(workflow_layer_biases_float[i]->size),workflow_layer_biases_float[i]->dimension); nn_data_convert_float_to_int16_fixedpoint(workflow_layer_weights_float[i],workflow_layer_weights_int16[i],1 << nnwrkld_weights_float_fraction[i]); nn_data_convert_float_to_int32_fixedpoint(workflow_layer_biases_float[i],workflow_layer_biases_int32[i],1 << nnwrkld_biases_float_fraction[i]); } // ------------------------------------------------------------------------------------------ // STAGE 0 (input) // output: 227x227x3 { di->workflow_item_create_function(&workflow_layer[input],0,nullptr,1); workflow_layer[input]->type = NN_WORK_ITEM_TYPE_INPUT; workflow_layer[input]->arguments.input.index = 0; workflow_layer[input]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[input]->output_format[0].format_3d ={{img_size,img_size,3}}; } // ------------------------------------------------------------------------------------------ // STAGE 0 (imagenet_mean_subtract) // output: 227x227x3 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[input],0}; di->workflow_item_create_function(&workflow_layer[mean_substract],1,&inputs_descriptor,1); workflow_layer[mean_substract]->type = NN_WORK_ITEM_TYPE_ARITHMETIC; workflow_layer[mean_substract]->arguments.forward_arithmetic.factor = mean_factor; workflow_layer[mean_substract]->arguments.forward_arithmetic.arithmetic_function = NN_ARITHMETIC_FUNCTION_SUBTRACTION; workflow_layer[mean_substract]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[mean_substract]->output_format[0].format_3d ={{img_size,img_size,3}}; } // ------------------------------------------------------------------------------------------ // STAGE 0 Convert float to int16 // { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[mean_substract], 0 }; di->workflow_item_create_function(&workflow_layer[convert], 1, &inputs_descriptor, 1); workflow_layer[convert]->type = NN_WORK_ITEM_TYPE_CONVERT_FLOAT_TO_INT16_FIXEDPOINT; workflow_layer[convert]->arguments.forward_convert_float_to_int16_fixedpoint.output_fraction = 0; workflow_layer[convert]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[convert]->output_format[0].format_3d = nn_output_format_3d{ { img_size, img_size, 4 } }; } // ------------------------------------------------------------------------------------------ // STAGE 01 // convo: 11x11 stride 4x4; ReLU; output: 55x55x96 // maxpool: 3x3 stride 2x2; // norm: RESPONSE_ACROSS_MAPS // output: 27x27x96 { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[convert], 0 }; di->workflow_item_create_function(&workflow_layer[conv1], 1, &inputs_descriptor, 1); workflow_layer[conv1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT; workflow_layer[conv1]->name = "c1"; workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv1_factor]; workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv1_factor]; workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 0; workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 0; workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 4; workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 4; workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv1_factor]; workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv1_factor]; workflow_layer[conv1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv1]->output_format[0].format_3d = { { 55, 55, 96 } }; } { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[conv1], 0 }; di->workflow_item_create_function(&workflow_layer[pool1], 1, &inputs_descriptor, 1); workflow_layer[pool1]->type = NN_WORK_ITEM_TYPE_MAX_POOLING_INT16_FIXEDPOINT; workflow_layer[pool1]->name = "p1"; workflow_layer[pool1]->arguments.forward_pooling_fixedpoint.pool_size[0] = 3; workflow_layer[pool1]->arguments.forward_pooling_fixedpoint.pool_size[1] = 3; workflow_layer[pool1]->arguments.forward_pooling_fixedpoint.pool_stride[0] = 2; workflow_layer[pool1]->arguments.forward_pooling_fixedpoint.pool_stride[1] = 2; workflow_layer[pool1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[pool1]->output_format[0].format_3d = { { 27, 27, 96 } }; } { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[pool1], 0 }; di->workflow_item_create_function(&workflow_layer[norm1], 1, &inputs_descriptor, 1); workflow_layer[norm1]->type = NN_WORK_ITEM_TYPE_NORMALIZATION_RESPONSE_ACROSS_MAPS_FORWARD_I16QN; workflow_layer[norm1]->name = "lrn1"; workflow_layer[norm1]->arguments.normalization_response_across_maps_forward_i16qn.k = 1; workflow_layer[norm1]->arguments.normalization_response_across_maps_forward_i16qn.n = 5; workflow_layer[norm1]->arguments.normalization_response_across_maps_forward_i16qn.alpha = 0.00002f; workflow_layer[norm1]->arguments.normalization_response_across_maps_forward_i16qn.beta = 0.75f; workflow_layer[norm1]->arguments.normalization_response_across_maps_forward_i16qn.fractions.input = nnwrkld_output_fraction[conv1_factor]; workflow_layer[norm1]->arguments.normalization_response_across_maps_forward_i16qn.fractions.output = nnwrkld_output_fraction[conv1_factor]; workflow_layer[norm1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[norm1]->output_format[0].format_3d = { { 27, 27, 96 } }; } // ------------------------------------------------------------------------------------------ // STAGE 02 // split: 2 (z-axis 96/2); output 27x27x(2*96/2) // convo: 5x5 stride 1x1; ReLU; 0-padded output: 27x27x(2*256/2) // merge: (z-axis) // maxpool: 3x3 stride 2x2; // norm: RESPONSE_ACROSS_MAPS // output: 13x13x256 { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[norm1], 0 }; di->workflow_item_create_function(&workflow_layer[subv1_1], 1, &inputs_descriptor, 1); // view g1 workflow_layer[subv1_1]->type = NN_WORK_ITEM_TYPE_VIEW; workflow_layer[subv1_1]->arguments.view.origin[0] = 0; workflow_layer[subv1_1]->arguments.view.origin[1] = 0; workflow_layer[subv1_1]->arguments.view.origin[2] = 0; workflow_layer[subv1_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[subv1_1]->output_format[0].format_3d = { { 27, 27, 96 / 2 } }; } { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[norm1], 0 }; di->workflow_item_create_function(&workflow_layer[subv1_2], 1, &inputs_descriptor, 1); // view g2 workflow_layer[subv1_2]->type = NN_WORK_ITEM_TYPE_VIEW; workflow_layer[subv1_2]->arguments.view.origin[0] = 0; workflow_layer[subv1_2]->arguments.view.origin[1] = 0; workflow_layer[subv1_2]->arguments.view.origin[2] = (96 / 2); workflow_layer[subv1_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[subv1_2]->output_format[0].format_3d = { { 27, 27, 96 / 2 } }; } // convolution 2, g1: 5x5 stride 1x1; ReLU; 0-padded output: 13x13x(2*96/2) { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[subv1_1], 0 }; di->workflow_item_create_function(&workflow_layer[conv2_1], 1, &inputs_descriptor, 1); workflow_layer[conv2_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT; workflow_layer[conv2_1]->name = "c2g1"; workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv2_1_factor]; workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv2_1_factor]; workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 2; workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 2; workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 1; workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 1; workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv2_1_factor]; workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv2_1_factor]; workflow_layer[conv2_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv2_1]->output_format[0].format_3d = { { 27, 27, 256 / 2 } }; } // convolution 2, g2: 5x5 stride 1x1; ReLU; 0-padded output: 13x13x(2*96/2) { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[subv1_2], 0 }; di->workflow_item_create_function(&workflow_layer[conv2_2], 1, &inputs_descriptor, 1); workflow_layer[conv2_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT; workflow_layer[conv2_2]->name = "c2g2"; workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv2_2_factor]; workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv2_2_factor]; workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 2; workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 2; workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 1; workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 1; workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv2_2_factor]; workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv2_2_factor]; workflow_layer[conv2_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv2_2]->output_format[0].format_3d = { { 27, 27, 256 / 2 } }; } // merge g1 and g2 { nn_workflow_use_descriptor_t inputs_descriptor[] = { { workflow_layer[conv2_1], 0 }, { workflow_layer[conv2_2], 0 } }; di->workflow_item_create_function(&workflow_layer[merge2], 2, inputs_descriptor, 1); workflow_layer[merge2]->type = NN_WORK_ITEM_TYPE_MERGE; workflow_layer[merge2]->arguments.forward_merge.axis = 2; // value 2 for z-axis workflow_layer[merge2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[merge2]->output_format[0].format_3d = { { 27, 27, 256 } }; } // maxpool: 3x3 stride 2x2; { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[merge2], 0 }; di->workflow_item_create_function(&workflow_layer[pool2], 1, &inputs_descriptor, 1); // pooling workflow_layer[pool2]->type = NN_WORK_ITEM_TYPE_MAX_POOLING_INT16_FIXEDPOINT; workflow_layer[pool2]->name = "p2"; workflow_layer[pool2]->arguments.forward_pooling_fixedpoint.pool_size[0] = 3; workflow_layer[pool2]->arguments.forward_pooling_fixedpoint.pool_size[1] = 3; workflow_layer[pool2]->arguments.forward_pooling_fixedpoint.pool_stride[0] = 2; workflow_layer[pool2]->arguments.forward_pooling_fixedpoint.pool_stride[1] = 2; workflow_layer[pool2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[pool2]->output_format[0].format_3d = { { 13, 13, 256 } }; } //norm: RESPONSE_ACROSS_MAPS; output: 13x13x256 { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[pool2], 0 }; di->workflow_item_create_function(&workflow_layer[norm2], 1, &inputs_descriptor, 1); workflow_layer[norm2]->type = NN_WORK_ITEM_TYPE_NORMALIZATION_RESPONSE_ACROSS_MAPS_FORWARD_I16QN; workflow_layer[norm2]->name = "lrn2"; workflow_layer[norm2]->arguments.normalization_response_across_maps_forward_i16qn.k = 1; workflow_layer[norm2]->arguments.normalization_response_across_maps_forward_i16qn.n = 5; workflow_layer[norm2]->arguments.normalization_response_across_maps_forward_i16qn.alpha = 0.00002f; workflow_layer[norm2]->arguments.normalization_response_across_maps_forward_i16qn.beta = 0.75f; workflow_layer[norm2]->arguments.normalization_response_across_maps_forward_i16qn.fractions.input = nnwrkld_output_fraction[conv2_2_factor]; workflow_layer[norm2]->arguments.normalization_response_across_maps_forward_i16qn.fractions.output = nnwrkld_output_fraction[conv2_2_factor]; workflow_layer[norm2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[norm2]->output_format[0].format_3d = { { 13, 13, 256 } }; } // ------------------------------------------------------------------------------------------ // STAGE 03 // convo: 3x3 stride 1x1; ReLU; 0-padded // output: 13x13x384 { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[norm2], 0 }; di->workflow_item_create_function(&workflow_layer[conv3], 1, &inputs_descriptor, 1); workflow_layer[conv3]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT; workflow_layer[conv3]->name = "c3"; workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv3_factor]; workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv3_factor]; workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 1; workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 1; workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 1; workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 1; workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv3_factor]; workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv3_factor]; workflow_layer[conv3]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv3]->output_format[0].format_3d = { { 13, 13, 384 } }; } // ------------------------------------------------------------------------------------------ // STAGE 04 // split: 2 (z-axis 384/2) // convo: 3x3 stride 1x1; ReLU; 0-padded // output: 13x13x(2*384/2) (continue split to next stage) { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[conv3], 0 }; di->workflow_item_create_function(&workflow_layer[subv3_1], 1, &inputs_descriptor, 1); // view g1 workflow_layer[subv3_1]->type = NN_WORK_ITEM_TYPE_VIEW; workflow_layer[subv3_1]->arguments.view.origin[0] = 0; workflow_layer[subv3_1]->arguments.view.origin[1] = 0; workflow_layer[subv3_1]->arguments.view.origin[2] = 0; workflow_layer[subv3_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[subv3_1]->output_format[0].format_3d = { { 13, 13, 384 / 2 } }; } { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[conv3], 0 }; di->workflow_item_create_function(&workflow_layer[subv3_2], 1, &inputs_descriptor, 1); // view g2 workflow_layer[subv3_2]->type = NN_WORK_ITEM_TYPE_VIEW; workflow_layer[subv3_2]->arguments.view.origin[0] = 0; workflow_layer[subv3_2]->arguments.view.origin[1] = 0; workflow_layer[subv3_2]->arguments.view.origin[2] = 384 / 2; workflow_layer[subv3_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[subv3_2]->output_format[0].format_3d = { { 13, 13, 384 / 2 } }; } { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[subv3_1], 0 }; di->workflow_item_create_function(&workflow_layer[conv4_1], 1, &inputs_descriptor, 1); // conv g1 workflow_layer[conv4_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT; workflow_layer[conv4_1]->name = "c4g1"; workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv4_1_factor]; workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv4_1_factor]; workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 1; workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 1; workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 1; workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 1; workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv4_1_factor]; workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv4_1_factor]; workflow_layer[conv4_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv4_1]->output_format[0].format_3d = { { 13, 13, 384 / 2 } }; } { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[subv3_2], 0 }; di->workflow_item_create_function(&workflow_layer[conv4_2], 1, &inputs_descriptor, 1); // conv g2 workflow_layer[conv4_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT; workflow_layer[conv4_2]->name = "c4g2"; workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv4_2_factor]; workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv4_2_factor]; workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 1; workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 1; workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 1; workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 1; workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv4_2_factor]; workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv4_2_factor]; workflow_layer[conv4_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv4_2]->output_format[0].format_3d = { { 13, 13, 384 / 2 } }; } // ------------------------------------------------------------------------------------------ // STAGE 05 // convo: 3x3 stride 1x1; ReLU; 0-padded; output: 13x13x(2*256/2) // merge: (z-axis) // maxpool: 3x3 stride 2x2; // output: 13x13x256 { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[conv4_1], 0 }; di->workflow_item_create_function(&workflow_layer[conv5_1], 1, &inputs_descriptor, 1); // conv g1 workflow_layer[conv5_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT; workflow_layer[conv5_1]->name = "c5g1"; workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv5_1_factor]; workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv5_1_factor]; workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 1; workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 1; workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 1; workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 1; workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv5_1_factor]; workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv5_1_factor]; workflow_layer[conv5_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv5_1]->output_format[0].format_3d = { { 13, 13, 256 / 2 } }; } { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[conv4_2], 0 }; di->workflow_item_create_function(&workflow_layer[conv5_2], 1, &inputs_descriptor, 1); // conv g2 workflow_layer[conv5_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT; workflow_layer[conv5_2]->name = "c5g2"; workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv5_2_factor]; workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv5_2_factor]; workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 1; workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 1; workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 1; workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 1; workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv5_2_factor]; workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv5_2_factor]; workflow_layer[conv5_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv5_2]->output_format[0].format_3d = { { 13, 13, 256 / 2 } }; } // merge g1 and g2 { nn_workflow_use_descriptor_t inputs_descriptor[] = {{workflow_layer[conv5_1],0},{workflow_layer[conv5_2],0}}; di->workflow_item_create_function(&workflow_layer[merge5], 2, inputs_descriptor, 1); workflow_layer[merge5]->type = NN_WORK_ITEM_TYPE_MERGE; workflow_layer[merge5]->arguments.forward_merge.axis = 2; // value 2 for z-axis workflow_layer[merge5]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[merge5]->output_format[0].format_3d = { { 13, 13, 256 } }; } // maxpool: 3x3 stride 2x2; { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[merge5], 0 }; di->workflow_item_create_function(&workflow_layer[pool5], 1, &inputs_descriptor, 1); // pooling workflow_layer[pool5]->type = NN_WORK_ITEM_TYPE_MAX_POOLING_INT16_FIXEDPOINT; workflow_layer[pool5]->name = "p5"; workflow_layer[pool5]->arguments.forward_pooling_fixedpoint.pool_size[0] = 3; workflow_layer[pool5]->arguments.forward_pooling_fixedpoint.pool_size[1] = 3; workflow_layer[pool5]->arguments.forward_pooling_fixedpoint.pool_stride[0] = 2; workflow_layer[pool5]->arguments.forward_pooling_fixedpoint.pool_stride[1] = 2; workflow_layer[pool5]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.accumulator = 16; workflow_layer[pool5]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.output = 8; workflow_layer[pool5]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[pool5]->output_format[0].format_3d = { { 6, 6, 256 } }; } // ------------------------------------------------------------------------------------------ // STAGE 06 // full: ReLU // output: 4096 { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[pool5], 0 }; di->workflow_item_create_function(&workflow_layer[fc6], 1, &inputs_descriptor, 1); workflow_layer[fc6]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED_FORWARD_I16QN_I16QN; workflow_layer[fc6]->name = "fc6"; workflow_layer[fc6]->arguments.fully_connected_forward_i16qn_i16qn.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[fc6]->arguments.fully_connected_forward_i16qn_i16qn.weights = workflow_layer_weights_int16[fc6_factor]; workflow_layer[fc6]->arguments.fully_connected_forward_i16qn_i16qn.biases = workflow_layer_biases_int32[fc6_factor]; workflow_layer[fc6]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.accumulator = nnwrkld_accumulator_fraction[fc6_factor]; workflow_layer[fc6]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.output = nnwrkld_output_fraction[fc6_factor]; workflow_layer[fc6]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[fc6]->output_format[0].format_1d = { { 4096 } }; } // ------------------------------------------------------------------------------------------ // STAGE 07 // full: ReLU // output: 4096 { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[fc6], 0 }; di->workflow_item_create_function(&workflow_layer[fc7], 1, &inputs_descriptor, 1); workflow_layer[fc7]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED_FORWARD_I16QN_I16QN; workflow_layer[fc7]->name = "fc7"; workflow_layer[fc7]->arguments.fully_connected_forward_i16qn_i16qn.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[fc7]->arguments.fully_connected_forward_i16qn_i16qn.weights = workflow_layer_weights_int16[fc7_factor]; workflow_layer[fc7]->arguments.fully_connected_forward_i16qn_i16qn.biases = workflow_layer_biases_int32[fc7_factor]; workflow_layer[fc7]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.accumulator = nnwrkld_accumulator_fraction[fc7_factor]; workflow_layer[fc7]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.output = nnwrkld_output_fraction[fc7_factor]; workflow_layer[fc7]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[fc7]->output_format[0].format_1d = { { 4096 } }; } // ------------------------------------------------------------------------------------------ // STAGE 08 // full: ; // output: 1000 { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[fc7], 0 }; di->workflow_item_create_function(&workflow_layer[fc8], 1, &inputs_descriptor, 1); workflow_layer[fc8]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED_FORWARD_I16QN_I32QN; workflow_layer[fc8]->name = "fc8"; workflow_layer[fc8]->arguments.fully_connected_forward_i16qn_i32qn.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_NONE; workflow_layer[fc8]->arguments.fully_connected_forward_i16qn_i32qn.weights = workflow_layer_weights_int16[fc8_factor]; workflow_layer[fc8]->arguments.fully_connected_forward_i16qn_i32qn.biases = workflow_layer_biases_int32[fc8_factor]; workflow_layer[fc8]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.accumulator = nnwrkld_accumulator_fraction[fc8_factor]; workflow_layer[fc8]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.output = nnwrkld_output_fraction[fc8_factor]; workflow_layer[fc8]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[fc8]->output_format[0].format_1d = { { 1000 } }; } // ------------------------------------------------------------------------------------------ // STAGE 09 (softmax) // output: 1000 { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[fc8], 0 }; di->workflow_item_create_function(&workflow_layer[softmax], 1, &inputs_descriptor, 1); workflow_layer[softmax]->type = NN_WORK_ITEM_TYPE_SOFTMAX_FIXEDPOINT; workflow_layer[softmax]->arguments.forward_softmax_fixedpoint.input_fraction = nnwrkld_output_fraction[fc8_factor]; workflow_layer[softmax]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[softmax]->output_format[0].format_1d = { { 1000 } }; } // ------------------------------------------------------------------------------------------ // STAGE 10 (output) // output: 1000 { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[softmax], 0 }; di->workflow_item_create_function(&workflow_layer[output], 1, &inputs_descriptor, 1); workflow_layer[output]->type = NN_WORK_ITEM_TYPE_OUTPUT; workflow_layer[output]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[output]->output_format[0].format_1d = { { 1000 } }; } // ------------------------------------------------------------------------------------------- // END of workflow stages definition // ------------------------------------------------------------------------------------------- workflow->input[0] = workflow_layer[input]; workflow->output[0] = workflow_layer[output]; // ------------------------------------------------------------------------------------------- return workflow; }
bool test_google_float_workload_cpu_images_classification::run() { bool run_ok = true; test_measurement_result run_result; run_result.description = "RUN SUMMARY: " + test_description; C_time_control run_timer; std::cout << "-> Testing: " << test_description << std::endl; try { if(!init()) throw std::runtime_error("init() returns false so can't run test"); run_timer.tick(); //start time measurement run_result << std::string("run test with " + current_tested_device->get_device_description()); for(uint32_t batch :{1,8,48}) { C_time_control loop_timer; // compiling workload nn_workload_t *workload = nullptr; NN_WORKLOAD_DATA_TYPE input_format = NN_WORKLOAD_DATA_TYPE_F32_ZXY_BATCH; NN_WORKLOAD_DATA_TYPE output_format = NN_WORKLOAD_DATA_TYPE_F32_1D_BATCH; auto status = di->workflow_compile_function(&workload, di->device, workflow, &input_format, &output_format, batch); if(!workload) throw std::runtime_error("workload compilation failed for batch = " + std::to_string(batch) + " status: " + std::to_string(status)); test_measurement_result local_result; local_result.description = "RUN PART: (batch " + std::to_string(batch)+") execution of " + test_description; bool local_ok = true; auto images_list_iterator = images_list.begin(); auto images_list_end = images_list.end(); while(images_list_iterator != images_list_end) { auto diff_itr = images_list_end - images_list_iterator < batch ? images_list_end - images_list_iterator : batch; std::vector< std::string > batch_images(images_list_iterator,images_list_iterator + diff_itr); images_list_iterator += diff_itr; nn::data< float,4 > *images = nullptr; images = nn_data_load_from_image_list(&batch_images, img_size, image_process, batch, RGB_order); if(images) { nn_data_t *input_array[1] ={images}; nn::data<float, 2> *workload_output = new nn::data<float, 2>(1000, batch); if(workload_output == nullptr) throw std::runtime_error("unable to create workload_output for batch = " +std::to_string(batch)); nn::data<float> *output_array_cmpl[1] ={nn::data_cast<float,0>(workload_output)}; di->workload_execute_function(workload,reinterpret_cast<void**>(input_array),reinterpret_cast<void**>(output_array_cmpl),&status); float *value_cmpl = reinterpret_cast<float *>(workload_output->buffer); for(auto &image_filename : batch_images) { std::ifstream reference_output_file(image_filename + ".txt", std::ifstream::in); // Comparison with the reference output workload float difference = 0; for(int index = 0; index < 1000; ++index) { std::string reference_value_str; std::getline(reference_output_file,reference_value_str); float reference_value = std::stof(reference_value_str); float delta = value_cmpl[index]-reference_value; difference += abs(delta); } if(difference < threshold_to_pass_test) local_result << std::string("note: " + image_filename + " difference = " + std::to_string(difference)); else { local_result << std::string("error: image file: " + image_filename +" the difference exceeded the allowable threshold for compliance: " + std::to_string(difference) + " > " + std::to_string(threshold_to_pass_test)); local_ok = false; run_ok = false; } reference_output_file.close(); value_cmpl += 1000; } batch_images.clear(); if(images != nullptr) delete images; if(workload_output != nullptr) delete workload_output; } } // batch loop summary: local_result.passed = local_ok; loop_timer.tock(); local_result.time_consumed = loop_timer.get_time_diff(); local_result.clocks_consumed = loop_timer.get_clocks_diff(); tests_results << local_result; if(workload != nullptr) di->workload_delete_function(workload); } // END: for(uint32_t batch :{1,8,48}) } catch(std::runtime_error &error) { run_result << "error: " + std::string(error.what()); run_ok = false; } catch(...) { run_result << "error: unknown"; run_ok = false; } run_timer.tock(); run_result.time_consumed = run_timer.get_time_diff(); run_result.clocks_consumed = run_timer.get_clocks_diff(); run_result.passed = run_ok; tests_results << run_result; if (!done()) run_ok=false; std::cout << "<- Test " << (run_ok ? "passed" : "failed") << std::endl;; return run_ok; }
bool test_convolution_float_cpu_random::run() { bool run_ok = true; test_measurement_result run_result; run_result.description = "RUN SUMMARY: " + test_description; std::cout << "-> Testing: " << test_description << std::endl; try { if(!init()) throw std::runtime_error( "init() returns false so can't run test" ); NN_WORKLOAD_DATA_TYPE input_format = NN_WORKLOAD_DATA_TYPE_F32_3D_BATCH; NN_WORKLOAD_DATA_TYPE output_format = NN_WORKLOAD_DATA_TYPE_F32_3D_BATCH; for(uint32_t batch : { 1, 8, 48 }) { bool local_ok = true; test_measurement_result local_result; local_result.description = "RUN PART: (batch " + std::to_string( batch ) + ") execution of " + test_description; C_time_control local_timer; // begin local test uint32_t z = 2, img_size = 227, num_features_map = 8; nn::data<float, 4> *images = new nn::data<float, 4>( img_size, img_size, z, batch ); if(images == nullptr) throw std::runtime_error("Cant't create images nn::data"); nn_data_populate( nn::data_cast<float, 0>(images), 0.0f, 255.0f ); nn::data<float, 4> *images_with_padding = new nn::data<float, 4>( img_size + 2, img_size + 2, z, batch ); if(images_with_padding == nullptr) { delete images; throw std::runtime_error("Cant't create images_with_padding nn::data"); } { // padding for input for naive method nn_data_populate( nn::data_cast<float, 0>(images_with_padding), 0.0f ); for(uint32_t tmp_batch = 0; tmp_batch < batch; ++tmp_batch) for(uint32_t tmp_z = 0; tmp_z < z; ++tmp_z) for(uint32_t y = 0; y < img_size; ++y) for(uint32_t x = 0; x < img_size; ++x) images_with_padding->at( x, y, tmp_z, tmp_batch ) = images->at( x, y, tmp_z, tmp_batch ); } nn_workload_t *workload = nullptr; nn_data_t *input_array[1] = { images }; auto workload_output = new nn::data<float, 4>( img_size, img_size, num_features_map, batch ); if(workload_output==nullptr) { delete images; delete images_with_padding; throw std::runtime_error("unable to create workload_output for batch = " +std::to_string(batch)); } nn::data<float> *output_array_cmpl[1] = { nn::data_cast<float, 0>(workload_output) }; auto naive_output = new nn::data<float, 4>( img_size, img_size, num_features_map, batch ); if(naive_output==nullptr) { delete images; delete images_with_padding; delete workload_output; throw std::runtime_error("unable to create naive_output for batch = " +std::to_string(batch)); } auto status = di->workflow_compile_function( &workload, di->device, workflow, &input_format, &output_format, batch ); if(!workload) throw std::runtime_error( "workload compilation failed for batch = " + std::to_string( batch ) + " status: " + std::to_string( status ) ); test_measurement_result run_result; run_result.description = "RUN PART: (batch " + std::to_string( batch ) + ") execution of " + test_description; // changing order needed //di->workload_execute_function( workload, reinterpret_cast<void**>(input_array), reinterpret_cast<void**>(output_array_cmpl), &status ); float* biases = nullptr; float* weights = nullptr; { // read biases and weights if(NN_WORK_ITEM_TYPE_CONVOLUTION == workflow->input[0]->use[0].item->type) { auto tmp = reinterpret_cast<nn_arguments_forward_convolution_t*>(&workflow->input[0]->use[0].item->arguments); biases = reinterpret_cast<float*>(tmp->biases->buffer); weights = reinterpret_cast<float*>(tmp->weights->buffer); } } if(nullptr == biases || nullptr == weights) throw std::runtime_error( "reading weight or biases for naive version failed for batch = " + std::to_string( batch ) ); naive_convolv_float_implementation( reinterpret_cast<float*>(images_with_padding->buffer), reinterpret_cast<float*>(naive_output->buffer), biases, weights, batch, num_features_map, z, img_size, img_size, img_size + 2, img_size + 2, 3, 3, 1, 1, NN_ACTIVATION_FUNCTION_RELU ); //local_ok = compare_4d_data( workload_output, naive_output ); local_ok = true; // BLIND TEST // end of local test // summary: local_timer.tock(); local_result.time_consumed = local_timer.get_time_diff(); local_result.clocks_consumed = local_timer.get_clocks_diff(); local_result.passed = local_ok; tests_results << local_result; run_ok = run_ok && local_ok; if(workload_output) delete workload_output; if(naive_output) delete naive_output; if(images) delete images; if(images_with_padding) delete images_with_padding; } } catch(std::runtime_error &error) { tests_results << run_result; std::cout << "error: " << error.what() << std::endl; } catch(std::exception &error) { tests_results << run_result; std::cout << "error: " << error.what() << std::endl; } catch(...) { tests_results << run_result; std::cout << "error: unknown" << std::endl; } if(!done()) run_ok = false; std::cout << "<- Test " << (run_ok ? "passed" : "failed") << std::endl;; return run_ok; }
virtual nn_workflow_t *init_test_workflow(nn_device_interface_0_t *_di) { if(!is_valid()) throw std::runtime_error(error_); for(auto wi : workflow_layer) wi = nullptr; for(auto wb : workflow_layer_factor) wb = nullptr; this->di = _di; // create and populate nn:data factors (weights and biases) for successive layers workflow_layer_factor[mean_factor] = new nn::data<float>(img_size,img_size,3); nn_data_populate(workflow_layer_factor[mean_factor],104.007f,122.679f); workflow_layer_factor[conv1_weights] = new nn::data<float>(11,11,3,96); nn_data_populate(workflow_layer_factor[conv1_weights],-0.374f,0.403f); workflow_layer_factor[conv1_biases] = new nn::data<float>(96); nn_data_populate(workflow_layer_factor[conv1_biases],-0.854f,0.232f); workflow_layer_factor[conv2_1_weights] = new nn::data<float>(5,5,48,128); nn_data_populate(workflow_layer_factor[conv2_1_weights],-0.285f,0.379f); workflow_layer_factor[conv2_1_biases] = new nn::data<float>(128); nn_data_populate(workflow_layer_factor[conv2_1_biases],0.974f,1.034f); workflow_layer_factor[conv2_2_weights] = new nn::data<float>(5,5,48,128); nn_data_populate(workflow_layer_factor[conv2_2_weights],-0.269f,0.416f); workflow_layer_factor[conv2_2_biases] = new nn::data<float>(128); nn_data_populate(workflow_layer_factor[conv2_2_biases],0.958f,1.027f); workflow_layer_factor[conv3_weights] = new nn::data<float>(3,3,256,384); nn_data_populate(workflow_layer_factor[conv3_weights],-0.185f,0.512f); workflow_layer_factor[conv3_biases] = new nn::data<float>(384); nn_data_populate(workflow_layer_factor[conv3_biases],-0.104f,0.093f); workflow_layer_factor[conv4_1_weights] = new nn::data<float>(3,3,192,192); nn_data_populate(workflow_layer_factor[conv4_1_weights],-0.103f,0.322f); workflow_layer_factor[conv4_1_biases] = new nn::data<float>(192); nn_data_populate(workflow_layer_factor[conv4_1_biases],0.844f,1.142f); workflow_layer_factor[conv4_2_weights] = new nn::data<float>(3,3,192,192); nn_data_populate(workflow_layer_factor[conv4_2_weights],-0.142f,0.353f); workflow_layer_factor[conv4_2_biases] = new nn::data<float>(192); nn_data_populate(workflow_layer_factor[conv4_2_biases],0.77f,1.219f); workflow_layer_factor[conv5_1_weights] = new nn::data<float>(3,3,192,128); nn_data_populate(workflow_layer_factor[conv5_1_weights],-0.092f,0.254f); workflow_layer_factor[conv5_1_biases] = new nn::data<float>(128); nn_data_populate(workflow_layer_factor[conv5_1_biases],0.723f,1.50f); workflow_layer_factor[conv5_2_weights] = new nn::data<float>(3,3,192,128); nn_data_populate(workflow_layer_factor[conv5_2_weights],-0.133f,0.315f); workflow_layer_factor[conv5_2_biases] = new nn::data<float>(128); nn_data_populate(workflow_layer_factor[conv5_2_biases],0.623f,1.742f); workflow_layer_factor[fc6_weights] = new nn::data<float>(6,6,256,4096); nn_data_populate(workflow_layer_factor[fc6_weights],-0.035f,0.048f); workflow_layer_factor[fc6_biases] = new nn::data<float>(4096); nn_data_populate(workflow_layer_factor[fc6_biases],0.92f,1.057f); workflow_layer_factor[fc7_weights] = new nn::data<float>(4096,4096); nn_data_populate(workflow_layer_factor[fc7_weights],-0.032f,0.052f); workflow_layer_factor[fc7_biases] = new nn::data<float>(4096); nn_data_populate(workflow_layer_factor[fc7_biases],0.741f,1.26f); workflow_layer_factor[fc8_weights] = new nn::data<float>(4096,1000); nn_data_populate(workflow_layer_factor[fc8_weights],-0.045f,0.067f); workflow_layer_factor[fc8_biases] = new nn::data<float>(1000); nn_data_populate(workflow_layer_factor[fc8_biases],-0.351f,0.425f); di->workflow_create_function(&workflow,1,1); // ------------------------------------------------------------------------------------------ // STAGE 0 (input) // output: 227x227x3 { di->workflow_item_create_function(&workflow_layer[input],0,nullptr,1); workflow_layer[input]->type = NN_WORK_ITEM_TYPE_INPUT; workflow_layer[input]->arguments.input.index = 0; workflow_layer[input]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[input]->output_format[0].format_3d ={{img_size,img_size,3}}; } // ------------------------------------------------------------------------------------------ // STAGE 0 (imagenet_mean_subtract) // output: 227x227x3 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[input],0}; di->workflow_item_create_function(&workflow_layer[mean_substract],1,&inputs_descriptor,1); workflow_layer[mean_substract]->type = NN_WORK_ITEM_TYPE_ARITHMETIC; workflow_layer[mean_substract]->arguments.forward_arithmetic.factor = workflow_layer_factor[mean_factor]; workflow_layer[mean_substract]->arguments.forward_arithmetic.arithmetic_function = NN_ARITHMETIC_FUNCTION_SUBTRACTION; workflow_layer[mean_substract]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[mean_substract]->output_format[0].format_3d ={{img_size,img_size,3}}; } // ------------------------------------------------------------------------------------------ // STAGE 01 // convo: 11x11 stride 4x4; ReLU; output: 55x55x96 // maxpool: 3x3 stride 2x2; // norm: RESPONSE_ACROSS_MAPS // output: 27x27x96 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[mean_substract],0}; di->workflow_item_create_function(&workflow_layer[conv1],1,&inputs_descriptor,1); workflow_layer[conv1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv1]->name = "c1"; workflow_layer[conv1]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv1]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv1]->arguments.forward_convolution.weights = workflow_layer_factor[conv1_weights]; workflow_layer[conv1]->arguments.forward_convolution.biases = workflow_layer_factor[conv1_biases]; workflow_layer[conv1]->arguments.forward_convolution.center_offset[0] = 0; workflow_layer[conv1]->arguments.forward_convolution.center_offset[1] = 0; workflow_layer[conv1]->arguments.forward_convolution.stride[0] = 4; workflow_layer[conv1]->arguments.forward_convolution.stride[1] = 4; workflow_layer[conv1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv1]->output_format[0].format_3d ={{55,55,96}}; } { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv1],0}; di->workflow_item_create_function(&workflow_layer[pool1],1,&inputs_descriptor,1); workflow_layer[pool1]->type = NN_WORK_ITEM_TYPE_POOLING; workflow_layer[pool1]->name = "p1"; workflow_layer[pool1]->arguments.forward_pooling.mode = NN_POOLING_MODE_MAX; workflow_layer[pool1]->arguments.forward_pooling.size[0] = 3; workflow_layer[pool1]->arguments.forward_pooling.size[1] = 3; workflow_layer[pool1]->arguments.forward_pooling.stride[0] = 2; workflow_layer[pool1]->arguments.forward_pooling.stride[1] = 2; workflow_layer[pool1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[pool1]->output_format[0].format_3d ={{27,27,96}}; } { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[pool1],0}; di->workflow_item_create_function(&workflow_layer[norm1],1,&inputs_descriptor,1); workflow_layer[norm1]->type = NN_WORK_ITEM_TYPE_NORMALIZATION; workflow_layer[norm1]->name = "lrn1"; workflow_layer[norm1]->arguments.forward_normalization.normalization.mode = NN_NORMALIZATION_MODE_RESPONSE_ACROSS_MAPS; workflow_layer[norm1]->arguments.forward_normalization.normalization.k = 1; // in Krishevsky's article is 2 workflow_layer[norm1]->arguments.forward_normalization.normalization.n = 5; workflow_layer[norm1]->arguments.forward_normalization.normalization.alpha = 0.0001f/5; // in Krishevsky's paper is 1e-4, // but didn't write that sum of the squares // is divided by number of elements (n) workflow_layer[norm1]->arguments.forward_normalization.normalization.beta = 0.75f; workflow_layer[norm1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[norm1]->output_format[0].format_3d ={{27,27,96}}; } // ------------------------------------------------------------------------------------------ // STAGE 02 // split: 2 (z-axis 96/2); output 27x27x(2*96/2) // convo: 5x5 stride 1x1; ReLU; 0-padded output: 27x27x(2*256/2) // merge: (z-axis) // maxpool: 3x3 stride 2x2; // norm: RESPONSE_ACROSS_MAPS // output: 13x13x256 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[norm1],0}; di->workflow_item_create_function(&workflow_layer[subv1_1],1,&inputs_descriptor,1); // view g1 workflow_layer[subv1_1]->type = NN_WORK_ITEM_TYPE_VIEW; workflow_layer[subv1_1]->arguments.view.origin[0] = 0; workflow_layer[subv1_1]->arguments.view.origin[1] = 0; workflow_layer[subv1_1]->arguments.view.origin[2] = 0; workflow_layer[subv1_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[subv1_1]->output_format[0].format_3d ={{27,27,96/2}}; } { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[norm1],0}; di->workflow_item_create_function(&workflow_layer[subv1_2],1,&inputs_descriptor,1); // view g2 workflow_layer[subv1_2]->type = NN_WORK_ITEM_TYPE_VIEW; workflow_layer[subv1_2]->arguments.view.origin[0] = 0; workflow_layer[subv1_2]->arguments.view.origin[1] = 0; workflow_layer[subv1_2]->arguments.view.origin[2] = (96/2); workflow_layer[subv1_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[subv1_2]->output_format[0].format_3d ={{27,27,96/2}}; } // convolution 2, g1: 5x5 stride 1x1; ReLU; 0-padded output: 13x13x(2*96/2) { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[subv1_1],0}; di->workflow_item_create_function(&workflow_layer[conv2_1],1,&inputs_descriptor,1); workflow_layer[conv2_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv2_1]->name = "c2g1"; workflow_layer[conv2_1]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv2_1]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv2_1]->arguments.forward_convolution.weights = workflow_layer_factor[conv2_1_weights]; workflow_layer[conv2_1]->arguments.forward_convolution.biases = workflow_layer_factor[conv2_1_biases]; workflow_layer[conv2_1]->arguments.forward_convolution.center_offset[0] = 2; workflow_layer[conv2_1]->arguments.forward_convolution.center_offset[1] = 2; workflow_layer[conv2_1]->arguments.forward_convolution.stride[0] = 1; workflow_layer[conv2_1]->arguments.forward_convolution.stride[1] = 1; workflow_layer[conv2_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv2_1]->output_format[0].format_3d ={{27,27,256/2}}; } // convolution 2, g2: 5x5 stride 1x1; ReLU; 0-padded output: 13x13x(2*96/2) { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[subv1_2],0}; di->workflow_item_create_function(&workflow_layer[conv2_2],1,&inputs_descriptor,1); workflow_layer[conv2_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv2_2]->name = "c2g2"; workflow_layer[conv2_2]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv2_2]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv2_2]->arguments.forward_convolution.weights = workflow_layer_factor[conv2_2_weights]; workflow_layer[conv2_2]->arguments.forward_convolution.biases = workflow_layer_factor[conv2_2_biases]; workflow_layer[conv2_2]->arguments.forward_convolution.center_offset[0] = 2; workflow_layer[conv2_2]->arguments.forward_convolution.center_offset[1] = 2; workflow_layer[conv2_2]->arguments.forward_convolution.stride[0] = 1; workflow_layer[conv2_2]->arguments.forward_convolution.stride[1] = 1; workflow_layer[conv2_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv2_2]->output_format[0].format_3d ={{27,27,256/2}}; } // merge g1 and g2 { nn_workflow_use_descriptor_t inputs_descriptor[] ={{workflow_layer[conv2_1],0},{workflow_layer[conv2_2],0}}; di->workflow_item_create_function(&workflow_layer[merge2],2,inputs_descriptor,1); workflow_layer[merge2]->type = NN_WORK_ITEM_TYPE_MERGE; workflow_layer[merge2]->arguments.forward_merge.axis = 2; // value 2 for z-axis workflow_layer[merge2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[merge2]->output_format[0].format_3d ={{27,27,256}}; } // maxpool: 3x3 stride 2x2; { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[merge2],0}; di->workflow_item_create_function(&workflow_layer[pool2],1,&inputs_descriptor,1); // pooling workflow_layer[pool2]->type = NN_WORK_ITEM_TYPE_POOLING; workflow_layer[pool2]->name = "p2"; workflow_layer[pool2]->arguments.forward_pooling.mode = NN_POOLING_MODE_MAX; workflow_layer[pool2]->arguments.forward_pooling.size[0] = 3; workflow_layer[pool2]->arguments.forward_pooling.size[1] = 3; workflow_layer[pool2]->arguments.forward_pooling.stride[0] = 2; workflow_layer[pool2]->arguments.forward_pooling.stride[1] = 2; workflow_layer[pool2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[pool2]->output_format[0].format_3d ={{13,13,256}}; } //norm: RESPONSE_ACROSS_MAPS; output: 13x13x256 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[pool2],0}; di->workflow_item_create_function(&workflow_layer[norm2],1,&inputs_descriptor,1); workflow_layer[norm2]->type = NN_WORK_ITEM_TYPE_NORMALIZATION; workflow_layer[norm2]->name = "lrn2"; workflow_layer[norm2]->arguments.forward_normalization.normalization.mode = NN_NORMALIZATION_MODE_RESPONSE_ACROSS_MAPS; workflow_layer[norm2]->arguments.forward_normalization.normalization.k = 1; // | workflow_layer[norm2]->arguments.forward_normalization.normalization.n = 5; // | workflow_layer[norm2]->arguments.forward_normalization.normalization.alpha = 0.0001f/5; // > see coment at wrkflwi_stage_1_norm workflow_layer[norm2]->arguments.forward_normalization.normalization.beta = 0.75f; // | workflow_layer[norm2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[norm2]->output_format[0].format_3d ={{13,13,256}}; } // ------------------------------------------------------------------------------------------ // STAGE 03 // convo: 3x3 stride 1x1; ReLU; 0-padded // output: 13x13x384 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[norm2],0}; di->workflow_item_create_function(&workflow_layer[conv3],1,&inputs_descriptor,1); workflow_layer[conv3]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv3]->name = "c3"; workflow_layer[conv3]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv3]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv3]->arguments.forward_convolution.weights = workflow_layer_factor[conv3_weights]; workflow_layer[conv3]->arguments.forward_convolution.biases = workflow_layer_factor[conv3_biases]; workflow_layer[conv3]->arguments.forward_convolution.center_offset[0] = 1; workflow_layer[conv3]->arguments.forward_convolution.center_offset[1] = 1; workflow_layer[conv3]->arguments.forward_convolution.stride[0] = 1; workflow_layer[conv3]->arguments.forward_convolution.stride[1] = 1; workflow_layer[conv3]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv3]->output_format[0].format_3d ={{13,13,384}}; } // ------------------------------------------------------------------------------------------ // STAGE 04 // split: 2 (z-axis 384/2) // convo: 3x3 stride 1x1; ReLU; 0-padded // output: 13x13x(2*384/2) (continue split to next stage) { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv3],0}; di->workflow_item_create_function(&workflow_layer[subv3_1],1,&inputs_descriptor,1); // view g1 workflow_layer[subv3_1]->type = NN_WORK_ITEM_TYPE_VIEW; workflow_layer[subv3_1]->arguments.view.origin[0] = 0; workflow_layer[subv3_1]->arguments.view.origin[1] = 0; workflow_layer[subv3_1]->arguments.view.origin[2] = 0; workflow_layer[subv3_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[subv3_1]->output_format[0].format_3d ={{13,13,384/2}}; } { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv3],0}; di->workflow_item_create_function(&workflow_layer[subv3_2],1,&inputs_descriptor,1); // view g2 workflow_layer[subv3_2]->type = NN_WORK_ITEM_TYPE_VIEW; workflow_layer[subv3_2]->arguments.view.origin[0] = 0; workflow_layer[subv3_2]->arguments.view.origin[1] = 0; workflow_layer[subv3_2]->arguments.view.origin[2] = 384/2; workflow_layer[subv3_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[subv3_2]->output_format[0].format_3d ={{13,13,384/2}}; } { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[subv3_1],0}; di->workflow_item_create_function(&workflow_layer[conv4_1],1,&inputs_descriptor,1); // conv g1 workflow_layer[conv4_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv4_1]->name = "c4g1"; workflow_layer[conv4_1]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv4_1]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv4_1]->arguments.forward_convolution.weights = workflow_layer_factor[conv4_1_weights]; workflow_layer[conv4_1]->arguments.forward_convolution.biases = workflow_layer_factor[conv4_1_biases]; workflow_layer[conv4_1]->arguments.forward_convolution.center_offset[0] = 1; workflow_layer[conv4_1]->arguments.forward_convolution.center_offset[1] = 1; workflow_layer[conv4_1]->arguments.forward_convolution.stride[0] = 1; workflow_layer[conv4_1]->arguments.forward_convolution.stride[1] = 1; workflow_layer[conv4_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv4_1]->output_format[0].format_3d ={{13,13,384/2}}; } { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[subv3_2],0}; di->workflow_item_create_function(&workflow_layer[conv4_2],1,&inputs_descriptor,1); // conv g2 workflow_layer[conv4_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv4_2]->name = "c4g2"; workflow_layer[conv4_2]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv4_2]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv4_2]->arguments.forward_convolution.weights = workflow_layer_factor[conv4_1_weights]; workflow_layer[conv4_2]->arguments.forward_convolution.biases = workflow_layer_factor[conv4_2_biases]; workflow_layer[conv4_2]->arguments.forward_convolution.center_offset[0] = 1; workflow_layer[conv4_2]->arguments.forward_convolution.center_offset[1] = 1; workflow_layer[conv4_2]->arguments.forward_convolution.stride[0] = 1; workflow_layer[conv4_2]->arguments.forward_convolution.stride[1] = 1; workflow_layer[conv4_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv4_2]->output_format[0].format_3d ={{13,13,384/2}}; } // ------------------------------------------------------------------------------------------ // STAGE 05 // convo: 3x3 stride 1x1; ReLU; 0-padded; output: 13x13x(2*256/2) // merge: (z-axis) // maxpool: 3x3 stride 2x2; // output: 13x13x256 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv4_1],0}; di->workflow_item_create_function(&workflow_layer[conv5_1],1,&inputs_descriptor,1); // conv g1 workflow_layer[conv5_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv5_1]->name = "c5g1"; workflow_layer[conv5_1]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv5_1]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv5_1]->arguments.forward_convolution.weights = workflow_layer_factor[conv5_1_weights]; workflow_layer[conv5_1]->arguments.forward_convolution.biases = workflow_layer_factor[conv5_1_biases]; workflow_layer[conv5_1]->arguments.forward_convolution.center_offset[0] = 1; workflow_layer[conv5_1]->arguments.forward_convolution.center_offset[1] = 1; workflow_layer[conv5_1]->arguments.forward_convolution.stride[0] = 1; workflow_layer[conv5_1]->arguments.forward_convolution.stride[1] = 1; workflow_layer[conv5_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv5_1]->output_format[0].format_3d ={{13,13,256/2}}; } { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv4_2],0}; di->workflow_item_create_function(&workflow_layer[conv5_2],1,&inputs_descriptor,1); // conv g2 workflow_layer[conv5_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv5_2]->name = "c5g2"; workflow_layer[conv5_2]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv5_2]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv5_2]->arguments.forward_convolution.weights = workflow_layer_factor[conv5_2_weights]; workflow_layer[conv5_2]->arguments.forward_convolution.biases = workflow_layer_factor[conv5_2_biases]; workflow_layer[conv5_2]->arguments.forward_convolution.center_offset[0] = 1; workflow_layer[conv5_2]->arguments.forward_convolution.center_offset[1] = 1; workflow_layer[conv5_2]->arguments.forward_convolution.stride[0] = 1; workflow_layer[conv5_2]->arguments.forward_convolution.stride[1] = 1; workflow_layer[conv5_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv5_2]->output_format[0].format_3d ={{13,13,256/2}}; } // merge g1 and g2 { nn_workflow_use_descriptor_t inputs_descriptor[] ={{workflow_layer[conv5_1],0},{workflow_layer[conv5_2],0}}; di->workflow_item_create_function(&workflow_layer[merge5],2,inputs_descriptor,1); workflow_layer[merge5]->type = NN_WORK_ITEM_TYPE_MERGE; workflow_layer[merge5]->arguments.forward_merge.axis = 2; // value 2 for z-axis workflow_layer[merge5]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[merge5]->output_format[0].format_3d ={{13,13,256}}; } // maxpool: 3x3 stride 2x2; { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[merge5],0}; di->workflow_item_create_function(&workflow_layer[pool5],1,&inputs_descriptor,1); // pooling workflow_layer[pool5]->type = NN_WORK_ITEM_TYPE_POOLING; workflow_layer[pool5]->name = "p5"; workflow_layer[pool5]->arguments.forward_pooling.mode = NN_POOLING_MODE_MAX; workflow_layer[pool5]->arguments.forward_pooling.size[0] = 3; workflow_layer[pool5]->arguments.forward_pooling.size[1] = 3; workflow_layer[pool5]->arguments.forward_pooling.stride[0] = 2; workflow_layer[pool5]->arguments.forward_pooling.stride[1] = 2; workflow_layer[pool5]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[pool5]->output_format[0].format_3d ={{6,6,256}}; } // ------------------------------------------------------------------------------------------ // STAGE 06 // full: ReLU // output: 4096 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[pool5],0}; di->workflow_item_create_function(&workflow_layer[fc6],1,&inputs_descriptor,1); workflow_layer[fc6]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED; workflow_layer[fc6]->name = "fc6"; workflow_layer[fc6]->arguments.forward_fully_connected.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[fc6]->arguments.forward_fully_connected.weights = workflow_layer_factor[fc6_weights]; workflow_layer[fc6]->arguments.forward_fully_connected.biases = workflow_layer_factor[fc6_biases]; workflow_layer[fc6]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[fc6]->output_format[0].format_1d ={{4096}}; } // ------------------------------------------------------------------------------------------ // STAGE 07 // full: ReLU // output: 4096 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[fc6],0}; di->workflow_item_create_function(&workflow_layer[fc7],1,&inputs_descriptor,1); workflow_layer[fc7]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED; workflow_layer[fc7]->name = "fc7"; workflow_layer[fc7]->arguments.forward_fully_connected.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[fc7]->arguments.forward_fully_connected.weights = workflow_layer_factor[fc7_weights]; workflow_layer[fc7]->arguments.forward_fully_connected.biases = workflow_layer_factor[fc7_biases]; workflow_layer[fc7]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[fc7]->output_format[0].format_1d ={{4096}}; } // ------------------------------------------------------------------------------------------ // STAGE 08 // full: ; // output: 1000 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[fc7],0}; di->workflow_item_create_function(&workflow_layer[fc8],1,&inputs_descriptor,1); workflow_layer[fc8]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED; workflow_layer[fc8]->name = "fc8"; workflow_layer[fc8]->arguments.forward_fully_connected.activation.function = NN_ACTIVATION_FUNCTION_NONE; workflow_layer[fc8]->arguments.forward_fully_connected.weights = workflow_layer_factor[fc8_weights]; workflow_layer[fc8]->arguments.forward_fully_connected.biases = workflow_layer_factor[fc8_biases]; workflow_layer[fc8]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[fc8]->output_format[0].format_1d ={{1000}}; } // ------------------------------------------------------------------------------------------ // STAGE 09 (softmax) // output: 1000 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[fc8],0}; di->workflow_item_create_function(&workflow_layer[softmax],1,&inputs_descriptor,1); workflow_layer[softmax]->type = NN_WORK_ITEM_TYPE_SOFTMAX; workflow_layer[softmax]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[softmax]->output_format[0].format_1d ={{1000}}; } // ------------------------------------------------------------------------------------------ // STAGE 10 (output) // output: 1000 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[softmax],0}; di->workflow_item_create_function(&workflow_layer[output],1,&inputs_descriptor,1); workflow_layer[output]->type = NN_WORK_ITEM_TYPE_OUTPUT; workflow_layer[output]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[output]->output_format[0].format_1d ={{1000}}; } // ------------------------------------------------------------------------------------------- // END of workflow stages definition // ------------------------------------------------------------------------------------------- workflow->input[0] = workflow_layer[input]; workflow->output[0] = workflow_layer[output]; // ------------------------------------------------------------------------------------------- return workflow; }
bool run_convolve_test( const nn_device_interface_0_t &di, uint_least32_t num_output_feature_maps, uint_least32_t num_input_feature_maps, uint_least32_t input_feature_map_width, uint_least32_t input_feature_map_height, uint_least32_t kernel_width, uint_least32_t kernel_height, uint_least32_t kernel_stride_x, uint_least32_t kernel_stride_y, uint_least32_t num_batches, NN_ACTIVATION_FUNCTION activation_function ) { // Input generation float *input = nullptr; generate_input_data( input, input_feature_map_width, input_feature_map_height, num_input_feature_maps, num_batches ); // Generate Filter Data float *filters = nullptr; generate_filter_data( filters, kernel_width, kernel_height, num_input_feature_maps, num_output_feature_maps ); uint_least32_t output_width = ( ( input_feature_map_width - kernel_width ) / kernel_stride_x + 1 ); uint_least32_t output_height = ( ( input_feature_map_height - kernel_height ) / kernel_stride_y + 1 ); uint_least32_t output_depth = num_output_feature_maps; // cpu_outputs and gpu_outputs are filled in with biases // so as such biases do not exist as separate entity float init_output_val = 0.0; //No biases in output then output is initialized with zeros float *biases = nullptr; float *cpu_outputs = nullptr; float *gpu_outputs = nullptr; // Biases exists as separate entity (each neuron got it own bias value) init_data( biases, output_width * output_height * output_depth, 1.0f ); init_data( gpu_outputs, output_width * output_height * output_depth * num_batches, 0.0f ); init_data( cpu_outputs, output_width * output_height * output_depth * num_batches, 0.0f ); // Activation function fp_func_activ activ_func = nullptr; switch( activation_function ) { case NN_ACTIVATION_FUNCTION_NONE: activ_func = none; break; case NN_ACTIVATION_FUNCTION_TANH: activ_func = mytanh; break; case NN_ACTIVATION_FUNCTION_RELU: activ_func = relu; break; case NN_ACTIVATION_FUNCTION_SOFTPLUS: activ_func = softplus; break; default: printf( "Error: Not supported activation function chosen: %d\n", activation_function ); assert( 0 ); break; } nn_workload_data_coords_t conv_input_view_begin( 0, 0, 0, 0, 0, 0 ); nn_workload_data_coords_t conv_input_view_end( num_batches - 1, input_feature_map_width - 1, input_feature_map_height - 1, num_input_feature_maps - 1, 0, 0 ); nn_workload_data_coords_t conv_output_view_begin( 0, 0, 0, 0, 0, 0 ); nn_workload_data_coords_t conv_output_view_end( num_batches - 1, output_width - 1, output_height - 1, output_depth - 1, 0, 0 ); // Run reference convolving (needed for comparison) convolve_ref( activ_func, cpu_outputs, input, filters, biases, conv_output_view_begin, conv_output_view_end, conv_input_view_begin, conv_input_view_end, output_width, output_height, output_depth, input_feature_map_width, input_feature_map_height, num_input_feature_maps, kernel_width, kernel_height, num_input_feature_maps, kernel_stride_x, kernel_stride_y, 0, // center offset x 0, // center offset y num_batches ); // First workload item is input one (entity producing input data) nn_gpu_workload_item *input_workload_item = nullptr; initialize_input_workload_item( input_workload_item); // Specify layout nn_workload_data_layout_t input_output_weights_layout = { { 0, 0, 0, 0, 0, 0 }, // tile in log2(size) { 0, 0, 0, 0, 0, 0 }, // alignment { NN_DATA_COORD_x, NN_DATA_COORD_y, NN_DATA_COORD_z, NN_DATA_COORD_p, NN_DATA_COORD_n, NN_DATA_COORD_q }, // ordering NN_DATATYPE_FLOAT }; // specify dimensions of input, output and weights nn_workload_data_coords_t input_coords = { num_batches, input_feature_map_width, input_feature_map_height, num_input_feature_maps, 1, 1 }; nn_workload_data_coords_t output_coords = { num_batches, output_width, output_height, num_output_feature_maps, 1, 1 }; nn_workload_data_coords_t weight_coords = { 1, kernel_width, kernel_height, num_input_feature_maps, num_output_feature_maps, 1 }; // Now create convolution workload_item giving as input input_workload_item nn_gpu_workload_item *convolution_workload_item = nullptr; initialize_layer_workload_item( convolution_workload_item, input_workload_item, input_output_weights_layout, output_coords); convolution_workload_item->type = NN_WORK_ITEM_TYPE_CONVOLUTION; convolution_workload_item->arguments.forward_convolution.padding = NN_PADDING_MODE_NONE; convolution_workload_item->arguments.forward_convolution.stride[0] = kernel_stride_x; convolution_workload_item->arguments.forward_convolution.stride[1] = kernel_stride_y; convolution_workload_item->arguments.forward_convolution.center_offset[0] = 0; convolution_workload_item->arguments.forward_convolution.center_offset[1] = 0; convolution_workload_item->arguments.forward_convolution.activation.function = activation_function; nn::nn_workload_data_t< float > *weight_data = new nn::nn_workload_data_t< float >( filters, weight_coords, input_output_weights_layout ); convolution_workload_item->arguments.forward_convolution.weights = new nn::nn_workload_data_t< float >( weight_coords, input_output_weights_layout ); nn_workload_data_copy( weight_data, convolution_workload_item->arguments.forward_convolution.weights ); delete weight_data; //release temporary buffers nn_workload_data_coords_t bias_coords = { 1, 1, 1, 1, num_output_feature_maps, 1 }; nn::nn_workload_data_t< float > *bias_data = new nn::nn_workload_data_t< float >(biases, bias_coords, input_output_weights_layout); convolution_workload_item->arguments.forward_convolution.biases = new nn::nn_workload_data_t< float >( bias_coords, input_output_weights_layout ); nn_workload_data_copy( bias_data, convolution_workload_item->arguments.forward_convolution.biases ); delete bias_data; //release temporary buffers // Now create output workload_item giving softmax workload item as precedessor nn_gpu_workload_item *output_workload_item = nullptr; initialize_output_workload_item( output_workload_item, convolution_workload_item ); // Make a workload using two above created workload_items nn_gpu_workload *gpu_workload = nullptr; create_workload_using_workload_items( di, gpu_workload, num_batches, NN_WORKLOAD_DATA_TYPE_F32_3D_BATCH, NN_WORKLOAD_DATA_TYPE_F32_3D_BATCH, input_workload_item, convolution_workload_item, output_workload_item ); using io_data = std::unique_ptr<nn::data<float, 0>>; io_data execute_inputs[1]; io_data execute_outputs[1]; // specify dimensions of input, output and weights size_t execution_input_size[4] = {input_feature_map_width, input_feature_map_height, num_input_feature_maps, num_batches}; size_t execution_output_size[4] = {output_width, output_height, num_output_feature_maps, num_batches}; execute_inputs[0] = io_data(new nn::data<float, 0>(input, execution_input_size, 4)); execute_outputs[0] = io_data(new nn::data<float, 0>(gpu_outputs, execution_output_size, 4)); EXPECT_EQ( NN_API_STATUS_OK, di.workload_execute_function( ( nn_workload * )gpu_workload, ( void ** )execute_inputs, ( void ** )execute_outputs, nullptr ) ); EXPECT_EQ( true, verify_output( execute_outputs[0], cpu_outputs ) ); EXPECT_EQ( NN_API_STATUS_OK, di.workload_delete_function(( nn_workload * )gpu_workload)); #ifdef __linux__ free( cpu_outputs ); cpu_outputs = nullptr; free( gpu_outputs ); gpu_outputs = nullptr; free( filters ); filters = nullptr; free( biases ); biases = nullptr; free( input ); input = nullptr; #else _aligned_free( cpu_outputs ); cpu_outputs = nullptr; _aligned_free( gpu_outputs ); gpu_outputs = nullptr; _aligned_free( filters ); filters = nullptr; _aligned_free( biases ); biases = nullptr; _aligned_free( input ); input = nullptr; #endif //__linux__ return true; }
bool run_softmax_test( const nn_device_interface_0_t &di, uint_least32_t num_samples, uint_least32_t num_batches) // length of input to be processed (softmax normalize) { // Input generation (input feature maps to have pooling run on it) float *input = nullptr; generate_input_data( input, num_samples, 1, 1, num_batches ); // length of output is the same as input float *cpu_outputs; init_data( cpu_outputs, num_samples * num_batches, 0.0f ); float *gpu_outputs; init_data( gpu_outputs, num_samples * num_batches, 0.0f ); softmax_ref( cpu_outputs, input, num_samples, num_batches ); // First workload item is input one (entity producing input data) nn_gpu_workload_item *input_workload_item = nullptr; initialize_input_workload_item( input_workload_item); // Specify layout of softmax workload nn_workload_data_layout_t workload_layout = { { 0, 0, 0, 0, 0, 0 }, // tile in log2(size) { 0, 0, 0, 0, 0, 0 }, // alignment { NN_DATA_COORD_x, NN_DATA_COORD_y, NN_DATA_COORD_z, NN_DATA_COORD_p, NN_DATA_COORD_n, NN_DATA_COORD_q }, NN_DATATYPE_FLOAT }; // specify dimensions of input, output nn_workload_data_coords_t workload_coords = { num_batches, num_samples, 1, 1, 1, 1 }; size_t output_coords[2] = {num_samples, num_batches}; // Now create softmax workload_item giving as input input_workload_item nn_gpu_workload_item *softmax_workload_item = nullptr; initialize_layer_workload_item( softmax_workload_item, input_workload_item, workload_layout, workload_coords ); softmax_workload_item->type = NN_WORK_ITEM_TYPE_SOFTMAX; // Now create output workload_item giving softmax workload item as precedessor nn_gpu_workload_item *output_workload_item = nullptr; initialize_output_workload_item( output_workload_item, softmax_workload_item ); // Make a workload using two above created workload_items nn_gpu_workload *gpu_workload = nullptr; create_workload_using_workload_items( di, gpu_workload, num_batches, NN_WORKLOAD_DATA_TYPE_F32_1D_BATCH, NN_WORKLOAD_DATA_TYPE_F32_1D_BATCH, input_workload_item, softmax_workload_item, output_workload_item ); using io_data = std::unique_ptr<nn::data<float, 0>>; io_data execute_inputs[1]; io_data execute_outputs[1]; execute_inputs[0] = io_data(new nn::data<float, 0>(input, output_coords, 2)); execute_outputs[0] = io_data(new nn::data<float, 0>(gpu_outputs, output_coords, 2)); EXPECT_EQ( NN_API_STATUS_OK, di.workload_execute_function( ( nn_workload * )gpu_workload, ( void ** )execute_inputs, ( void ** )execute_outputs, nullptr ) ); nn_workload_data_coords_t output_view_begin(0, 0, 0, 0, 0, 0); nn_workload_data_coords_t output_view_end(num_batches - 1, num_samples - 1, 0, 0, 0, 0); // Compare CPU(reference) output with the one returned by GPU EXPECT_EQ( true, verify_output( execute_outputs[0], cpu_outputs ) ); EXPECT_EQ( NN_API_STATUS_OK, di.workload_delete_function(( nn_workload * )gpu_workload)); #ifdef __linux__ free( cpu_outputs ); cpu_outputs = nullptr; free( gpu_outputs ); gpu_outputs = nullptr; free( input ); input = nullptr; #else _aligned_free( cpu_outputs ); cpu_outputs = nullptr; _aligned_free( gpu_outputs ); gpu_outputs = nullptr; _aligned_free( input ); input = nullptr; #endif //__linux__ return true; }
bool test_caffe_float_workload_cpu_time::run() { bool run_ok = true; test_measurement_result run_result; run_result.description = "RUN SUMMARY: " + test_description; C_time_control run_timer; std::cout << "-> Testing: " << test_description << std::endl; try { if(!init()) throw std::runtime_error("error: init() returns false so can't run test"); run_timer.tick(); //start time measurement run_result << std::string("run test with " + current_tested_device->get_device_description()); // --------------------------------------------------------------------------------------------------------- // TODO: here test code //{ // BKM pattern of test with time measuring: // bool local_ok=true; // test_measurement_result local_result; // local_result.description = "RUN PART: (name part) of " + test_description; // C_time_control local_timer; // // begin local test // // end of local test // // summary: // local_timer.tock(); // local_result.time_consumed = local_timer.time_diff_string(); // local_result.clocks_consumed = local_timer.get_clocks_diff(); // tests_results << local_result; //} // The pattern, of complex instruction above, can be multiplied for(uint16_t batch :{1,8,48}) { std::vector<uint64_t> time_diffs; std::vector<uint64_t> clock_diffs; nn::data<float,4> *images = new nn::data<float,4>(img_size,img_size,3,batch); nn_data_populate(nn::data_cast<float,0>(images),0.0f,255.0f); nn_data_t *input_array[1] ={images}; auto workload_output = new nn::data<float, 2>(1000, batch); nn::data<float> *output_array_cmpl[1] ={ nn::data_cast<float, 0>(workload_output) }; nn_workload_t *workload = nullptr; // compiling workload NN_WORKLOAD_DATA_TYPE input_format = NN_WORKLOAD_DATA_TYPE_F32_ZXY_BATCH; NN_WORKLOAD_DATA_TYPE output_format = NN_WORKLOAD_DATA_TYPE_F32_1D_BATCH; auto status = di->workflow_compile_function(&workload,di->device,workflow,&input_format,&output_format,batch); if(!workload) throw std::runtime_error("workload compilation failed for batch = " + std::to_string(batch) + " status: " + std::to_string(status)); test_measurement_result local_result; local_result.description = "RUN PART: (batch " + std::to_string(batch)+") execution of " + test_description; local_result.loops = loops; // begin local test for(auto i = 0; i< loops; ++i) { NN_API_STATUS status; C_time_control loop_timer; di->workload_execute_function(workload,reinterpret_cast<void**>(input_array),reinterpret_cast<void**>(output_array_cmpl),&status); loop_timer.tock(); time_diffs.push_back(loop_timer.get_time_diff()/batch); clock_diffs.push_back(loop_timer.get_clocks_diff()/batch); } // end of local test // summary: uint64_t min_value = *std::min_element(time_diffs.begin(),time_diffs.end()); local_result.time_consumed = std::accumulate(time_diffs.begin(),time_diffs.end(),0.0)/time_diffs.size(); local_result.time_consumed_min = min_value; local_result.time_consumed_max = *std::max_element(time_diffs.begin(),time_diffs.end()); local_result << std::string("note: The shortest time for one image obtained from the chrono: " + C_time_control::time_diff_string(min_value)); local_result << std::string("note: Values of time's and clock's were divided by current value of batch: "+std::to_string(batch)); local_result.clocks_consumed = std::accumulate(clock_diffs.begin(),clock_diffs.end(),0.0)/clock_diffs.size(); local_result.clocks_consumed_min = *std::min_element(clock_diffs.begin(),clock_diffs.end()); local_result.clocks_consumed_max = *std::max_element(clock_diffs.begin(),clock_diffs.end()); tests_results << local_result; if(images != nullptr) delete images; if(workload_output != nullptr) delete workload_output; if(workload != nullptr) di->workload_delete_function(workload); } // --------------------------------------------------------------------------------------------------------- run_ok = true; } catch(std::runtime_error &error) { run_result << "error: " + std::string(error.what()); run_ok = false; } catch(...) { run_result << "error: unknown"; run_ok = false; } run_timer.tock(); run_result.time_consumed = run_timer.get_time_diff(); run_result.clocks_consumed = run_timer.get_clocks_diff(); run_result.passed = run_ok; tests_results << run_result; if (!done()) run_ok=false; std::cout << "<- Test " << (run_ok ? "passed" : "failed") << std::endl;; return run_ok; }
bool test_softmax_float_cpu_random::run() { bool run_ok = true; test_measurement_result run_result; run_result.description = "RUN SUMMARY: " + test_description; C_time_control run_timer; std::cout << "-> Testing: " << test_description << std::endl; try { if( !init() ) throw std::runtime_error( "init() returns false so can't run test" ); run_timer.tick(); //start time measurement run_result << std::string( "run test with " + current_tested_device->get_device_description() ); NN_WORKLOAD_DATA_TYPE input_format = NN_WORKLOAD_DATA_TYPE_F32_1D_BATCH; NN_WORKLOAD_DATA_TYPE output_format = NN_WORKLOAD_DATA_TYPE_F32_1D_BATCH; const int softmax_size = 1000; for( auto batch : { 1, 8, 48 } ) { // --------------------------------------------------------------------------------------------------------- { // simple sample pattern of test with time measuring: bool local_ok = true; test_measurement_result local_result; local_result.description = "RUN PART: (batch " + std::to_string( batch ) + ") execution of " + test_description; C_time_control local_timer; // begin local test auto input = new nn::data<float>( softmax_size, batch ); if(input == nullptr) throw std::runtime_error("unable to create input for batch = " +std::to_string(batch)); auto workload_output = new nn::data<float>( softmax_size, batch ); if(workload_output == nullptr) throw std::runtime_error("unable to create workload_output for batch = " +std::to_string(batch)); nn_data_populate( workload_output, 0.0f ); nn_data_populate( input, 0.0f, 20.0f ); nn_workload_t *workload = nullptr; nn_data_t *input_array[1] = { input }; nn::data<float> *output_array_cmpl[1] = { nn::data_cast<float, 0>(workload_output) }; auto status = di->workflow_compile_function( &workload, di->device, workflow, &input_format, &output_format, batch ); if( !workload ) throw std::runtime_error( "workload compilation failed for batch = " + std::to_string( batch ) + " status: " + std::to_string( status ) ); di->workload_execute_function( workload, reinterpret_cast<void**>(input_array), reinterpret_cast<void**>(output_array_cmpl), &status ); auto naive_output = cpu_layer_softmax( input ); local_ok = compare_data(workload_output, naive_output); // end of local test // summary: local_timer.tock(); local_result.time_consumed = local_timer.get_time_diff(); local_result.clocks_consumed = local_timer.get_clocks_diff(); local_result.passed = local_ok; tests_results << local_result; run_ok = run_ok && local_ok; if( input ) delete input; if( workload_output ) delete workload_output; if( naive_output ) delete naive_output; if( workload ) delete workload; } // The pattern, of complex instruction above, can be multiplied // END of run tests // --------------------------------------------------------------------------------------------------------- } } catch( std::runtime_error &error ) { run_result << "error: " + std::string( error.what() ); run_ok = false; } catch( std::exception &error ) { run_result << "error: " + std::string( error.what() ); run_ok = false; } catch( ... ) { run_result << "unknown error"; run_ok = false; } run_timer.tock(); run_result.time_consumed = run_timer.get_time_diff(); run_result.clocks_consumed = run_timer.get_clocks_diff(); run_result.passed = run_ok; tests_results << run_result; if( !done() ) run_ok = false; std::cout << "<- Test " << (run_ok ? "passed" : "failed") << std::endl;; return run_ok; }