virtual nn_workflow_t *init_test_workflow( nn_device_interface_0_t *_di ) { if(!is_valid()) throw std::runtime_error( error_ ); for(auto wi : workflow_layer) wi = nullptr; this->di = _di; di->workflow_create_function( &workflow, 1, 1 ); // STAGE 0 (input) { di->workflow_item_create_function( &workflow_layer[input], 0, nullptr, 1 ); workflow_layer[input]->type = NN_WORK_ITEM_TYPE_INPUT; workflow_layer[input]->arguments.input.index = 0; workflow_layer[input]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[input]->output_format[0].format_1d = { { relu_length } }; } // STAGE 1 relu { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[input], 0 }; di->workflow_item_create_function( &workflow_layer[relu], 1, &inputs_descriptor, 1 ); workflow_layer[relu]->type = NN_WORK_ITEM_TYPE_RELU; workflow_layer[relu]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[relu]->output_format[0].format_1d = { { relu_length } }; } // ------------------------------------------------------------------------------------------ // STAGE 2 output { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[relu], 0 }; di->workflow_item_create_function( &workflow_layer[output], 1, &inputs_descriptor, 1 ); workflow_layer[output]->type = NN_WORK_ITEM_TYPE_OUTPUT; workflow_layer[output]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[output]->output_format[0].format_3d = { { relu_length } }; } // ------------------------------------------------------------------------------------------- // END of workflow stages definition workflow->input[0] = workflow_layer[input]; workflow->output[0] = workflow_layer[output]; // ------------------------------------------------------------------------------------------- return workflow; }
virtual nn_workflow_t *init_workflow(nn_device_interface_0_t *di){ if(!is_valid()) throw std::runtime_error(error_); this->di = di; std::cout << "--------------------------------------------------------" << std::endl << "Loading weights and biases" << std::endl << std::endl; // Load weights and biases auto load_biases_or_weights = [](std::string wb_file_name) { nn::data<float> *wb_pointer = nn_data_load_from_file_time_measure(wb_file_name); if(wb_pointer == nullptr) { std::cerr << "Can't load " << wb_file_name << std::endl; throw; } return wb_pointer; }; try { nnwrkld_conv1_weights = load_biases_or_weights("weights_lenet/conv1.nn"); nnwrkld_conv1_biases = load_biases_or_weights("weights_lenet/conv1_bias.nn"); nnwrkld_conv2_weights = load_biases_or_weights("weights_lenet/conv2.nn"); nnwrkld_conv2_biases = load_biases_or_weights("weights_lenet/conv2_bias.nn"); nnwrkld_fc1_weights = load_biases_or_weights("weights_lenet/ip1.nn"); nnwrkld_fc1_biases = load_biases_or_weights("weights_lenet/ip1_bias.nn"); nnwrkld_fc2_weights = load_biases_or_weights("weights_lenet/ip2.nn"); nnwrkld_fc2_biases = load_biases_or_weights("weights_lenet/ip2_bias.nn"); } catch(...) { return workflow; } std::cout << "--------------------------------------------------------" << std::endl << "Build of workflow" << std::endl; di->workflow_create_function(&workflow, 1, 1); // ------------------------------------------------------------------------------------------ // STAGE 0 (input) // output: 28x28x3 { di->workflow_item_create_function(&wrkflwi_input, 0, nullptr, 1); wrkflwi_input->type = NN_WORK_ITEM_TYPE_INPUT; wrkflwi_input->arguments.input.index = 0; wrkflwi_input->output_format[0].format = NN_DATA_FORMAT_2D; wrkflwi_input->output_format[0].format_3d ={ { img_size, img_size} }; } // ------------------------------------------------------------------------------------------ // STAGE 01 // convo: 5x5 stride 1x1; no-activation; output: 24x24x20 // maxpool: 2x2 stride 2x2; // output: 12x12x20 { nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_input, 0 }; di->workflow_item_create_function(&wrkflwi_stage_1_conv, 1, &inputs_descriptor, 1); wrkflwi_stage_1_conv->type = NN_WORK_ITEM_TYPE_CONVOLUTION; wrkflwi_stage_1_conv->name = "c1"; wrkflwi_stage_1_conv->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; wrkflwi_stage_1_conv->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_NONE; // We have weights, biases for 20 filters , but we want to have for four more filters so lets add padding wrkflwi_stage_1_conv->arguments.forward_convolution.weights = nn_data_extend_weights_by_padding(nnwrkld_conv1_weights,1,24); wrkflwi_stage_1_conv->arguments.forward_convolution.biases = nn_data_extend_biases_by_padding(nnwrkld_conv1_biases,24); wrkflwi_stage_1_conv->arguments.forward_convolution.center_offset[0] = 0; wrkflwi_stage_1_conv->arguments.forward_convolution.center_offset[1] = 0; wrkflwi_stage_1_conv->arguments.forward_convolution.stride[0] = 1; wrkflwi_stage_1_conv->arguments.forward_convolution.stride[1] = 1; wrkflwi_stage_1_conv->output_format[0].format = NN_DATA_FORMAT_3D; // It should be 20 output FM , but we do support only case when output FM number is divisble by 8 wrkflwi_stage_1_conv->output_format[0].format_3d ={ { 24, 24, 24 } }; } { nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_stage_1_conv, 0 }; di->workflow_item_create_function(&wrkflwi_stage_1_pool, 1, &inputs_descriptor, 1); wrkflwi_stage_1_pool->type = NN_WORK_ITEM_TYPE_POOLING; wrkflwi_stage_1_pool->name = "p1"; wrkflwi_stage_1_pool->arguments.forward_pooling.mode = NN_POOLING_MODE_MAX; wrkflwi_stage_1_pool->arguments.forward_pooling.size[0] = 2; wrkflwi_stage_1_pool->arguments.forward_pooling.size[1] = 2; wrkflwi_stage_1_pool->arguments.forward_pooling.stride[0] = 2; wrkflwi_stage_1_pool->arguments.forward_pooling.stride[1] = 2; wrkflwi_stage_1_pool->output_format[0].format = NN_DATA_FORMAT_3D; wrkflwi_stage_1_pool->output_format[0].format_3d ={ { 12, 12, 24 } }; } // view { nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_stage_1_pool, 0 }; di->workflow_item_create_function(&wrkflwi_stage_1_subv, 1, &inputs_descriptor, 1); // view wrkflwi_stage_1_subv->type = NN_WORK_ITEM_TYPE_VIEW; wrkflwi_stage_1_subv->arguments.view.origin[0] = 0; wrkflwi_stage_1_subv->arguments.view.origin[1] = 0; wrkflwi_stage_1_subv->arguments.view.origin[2] = 0; wrkflwi_stage_1_subv->output_format[0].format = NN_DATA_FORMAT_3D; wrkflwi_stage_1_subv->output_format[0].format_3d ={ { 12, 12, 20 } }; } // ------------------------------------------------------------------------------------------ // STAGE 02 // convo: 5x5 stride 1x1; no-activation; output: 8x8x50 // maxpool: 2x2 stride 2x2; // output: 4x4x50 // convolution 2 { nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_stage_1_subv, 0 }; di->workflow_item_create_function(&wrkflwi_stage_2_conv, 1, &inputs_descriptor, 1); wrkflwi_stage_2_conv->type = NN_WORK_ITEM_TYPE_CONVOLUTION; wrkflwi_stage_2_conv->name = "c2"; wrkflwi_stage_2_conv->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_NONE; wrkflwi_stage_2_conv->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; wrkflwi_stage_2_conv->arguments.forward_convolution.weights = nn_data_extend_weights_by_padding(nnwrkld_conv2_weights,20,56); wrkflwi_stage_2_conv->arguments.forward_convolution.biases = nn_data_extend_biases_by_padding(nnwrkld_conv2_biases,56); wrkflwi_stage_2_conv->arguments.forward_convolution.center_offset[0] = 0; wrkflwi_stage_2_conv->arguments.forward_convolution.center_offset[1] = 0; wrkflwi_stage_2_conv->arguments.forward_convolution.stride[0] = 1; wrkflwi_stage_2_conv->arguments.forward_convolution.stride[1] = 1; wrkflwi_stage_2_conv->output_format[0].format = NN_DATA_FORMAT_3D; // It should be 50 output FM , but we do support only case when output FM number is divisble by 8 wrkflwi_stage_2_conv->output_format[0].format_3d ={ { 8, 8, 56 } }; } // maxpool: 2x2 stride 2x2; { nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_stage_2_conv, 0 }; di->workflow_item_create_function(&wrkflwi_stage_2_pool, 1, &inputs_descriptor, 1); // pooling wrkflwi_stage_2_pool->type = NN_WORK_ITEM_TYPE_POOLING; wrkflwi_stage_2_pool->name = "p2"; wrkflwi_stage_2_pool->arguments.forward_pooling.mode = NN_POOLING_MODE_MAX; wrkflwi_stage_2_pool->arguments.forward_pooling.size[0] = 2; wrkflwi_stage_2_pool->arguments.forward_pooling.size[1] = 2; wrkflwi_stage_2_pool->arguments.forward_pooling.stride[0] = 2; wrkflwi_stage_2_pool->arguments.forward_pooling.stride[1] = 2; wrkflwi_stage_2_pool->output_format[0].format = NN_DATA_FORMAT_3D; wrkflwi_stage_2_pool->output_format[0].format_3d ={ { 4, 4, 56 } }; } // ------------------------------------------------------------------------------------------ // STAGE 03 // full: ReLU // output: 500 { nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_stage_2_pool, 0 }; di->workflow_item_create_function(&wrkflwi_stage_3_fc, 1, &inputs_descriptor, 1); wrkflwi_stage_3_fc->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED; wrkflwi_stage_3_fc->name = "fc1"; wrkflwi_stage_3_fc->arguments.forward_fully_connected.activation.function = NN_ACTIVATION_FUNCTION_RELU; // Generated weights if taken from caffe , are in 2D format while we need them in 4d format nn::data<float>* nnwrkld_fc1_converted_weights = nn_data_convert_weights_2D_to_4D(nnwrkld_fc1_weights, 4, 4, 50, nnwrkld_fc1_weights->size[1]); // release original weights delete nnwrkld_fc1_weights; // Extend weights' depth of FC layer to match extended weights input nnwrkld_fc1_weights = nn_data_extend_weights_by_padding(nnwrkld_fc1_converted_weights,56,nnwrkld_fc1_converted_weights->size[3]); delete nnwrkld_fc1_converted_weights; nnwrkld_fc1_converted_weights = nullptr; wrkflwi_stage_3_fc->arguments.forward_fully_connected.weights = nnwrkld_fc1_weights; wrkflwi_stage_3_fc->arguments.forward_fully_connected.biases = nnwrkld_fc1_biases; wrkflwi_stage_3_fc->output_format[0].format = NN_DATA_FORMAT_1D; wrkflwi_stage_3_fc->output_format[0].format_1d ={ { 500 } }; } // ------------------------------------------------------------------------------------------ // STAGE 04 // full: ; // output: 10 { nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_stage_3_fc, 0 }; di->workflow_item_create_function(&wrkflwi_stage_4_fc, 1, &inputs_descriptor, 1); wrkflwi_stage_4_fc->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED; wrkflwi_stage_4_fc->name = "fc2"; wrkflwi_stage_4_fc->arguments.forward_fully_connected.activation.function = NN_ACTIVATION_FUNCTION_NONE; wrkflwi_stage_4_fc->arguments.forward_fully_connected.weights = nnwrkld_fc2_weights; wrkflwi_stage_4_fc->arguments.forward_fully_connected.biases = nnwrkld_fc2_biases; wrkflwi_stage_4_fc->output_format[0].format = NN_DATA_FORMAT_1D; wrkflwi_stage_4_fc->output_format[0].format_1d ={ { 10 } }; } // ------------------------------------------------------------------------------------------ // STAGE 05 (softmax) // output: 10 { nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_stage_4_fc, 0 }; di->workflow_item_create_function(&wrkflwi_softmax, 1, &inputs_descriptor, 1); wrkflwi_softmax->type = NN_WORK_ITEM_TYPE_SOFTMAX; wrkflwi_softmax->output_format[0].format = NN_DATA_FORMAT_1D; wrkflwi_softmax->output_format[0].format_1d ={ { 10 } }; } // ------------------------------------------------------------------------------------------ // STAGE 6 (output) // output: 10 { nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_softmax, 0 }; di->workflow_item_create_function(&wrkflwi_output, 1, &inputs_descriptor, 1); wrkflwi_output->type = NN_WORK_ITEM_TYPE_OUTPUT; wrkflwi_output->output_format[0].format = NN_DATA_FORMAT_1D; wrkflwi_output->output_format[0].format_1d ={ { 10 } }; } // ------------------------------------------------------------------------------------------- // END of workflow stages definition // ------------------------------------------------------------------------------------------- workflow->input[0] = wrkflwi_input; workflow->output[0] = wrkflwi_output; // ------------------------------------------------------------------------------------------- return workflow; }
virtual nn_workflow_t *init_test_workflow(nn_device_interface_0_t *_di) { if(!is_valid()) throw std::runtime_error(error_); this->di = _di; // load nn:data factors (weights and biases) for successive layers mean_factor = nn_data_load_from_file("weights_caffenet/imagenet_mean.nnd"); workflow_layer_weights_float[conv1_factor] = nn_data_load_from_file("weights_caffenet/conv1.nnd"); workflow_layer_biases_float[conv1_factor] = nn_data_load_from_file("weights_caffenet/conv1_bias.nnd"); workflow_layer_weights_float[conv2_1_factor] = nn_data_load_from_file("weights_caffenet/conv2_g1.nnd"); workflow_layer_biases_float[conv2_1_factor] = nn_data_load_from_file("weights_caffenet/conv2_bias_g1.nnd"); workflow_layer_weights_float[conv2_2_factor] = nn_data_load_from_file("weights_caffenet/conv2_g2.nnd"); workflow_layer_biases_float[conv2_2_factor] = nn_data_load_from_file("weights_caffenet/conv2_bias_g2.nnd"); workflow_layer_weights_float[conv3_factor] = nn_data_load_from_file("weights_caffenet/conv3.nnd"); workflow_layer_biases_float[conv3_factor] = nn_data_load_from_file("weights_caffenet/conv3_bias.nnd"); workflow_layer_weights_float[conv4_1_factor] = nn_data_load_from_file("weights_caffenet/conv4_g1.nnd"); workflow_layer_biases_float[conv4_1_factor] = nn_data_load_from_file("weights_caffenet/conv4_bias_g1.nnd"); workflow_layer_weights_float[conv4_2_factor] = nn_data_load_from_file("weights_caffenet/conv4_g2.nnd"); workflow_layer_biases_float[conv4_2_factor] = nn_data_load_from_file("weights_caffenet/conv4_bias_g2.nnd"); workflow_layer_weights_float[conv5_1_factor] = nn_data_load_from_file("weights_caffenet/conv5_g1.nnd"); workflow_layer_biases_float[conv5_1_factor] = nn_data_load_from_file("weights_caffenet/conv5_bias_g1.nnd"); workflow_layer_weights_float[conv5_2_factor] = nn_data_load_from_file("weights_caffenet/conv5_g2.nnd"); workflow_layer_biases_float[conv5_2_factor] = nn_data_load_from_file("weights_caffenet/conv5_bias_g2.nnd"); workflow_layer_weights_float[fc6_factor] = nn_data_load_from_file("weights_caffenet/fc6.nnd"); workflow_layer_biases_float[fc6_factor] = nn_data_load_from_file("weights_caffenet/fc6_bias.nnd"); workflow_layer_weights_float[fc7_factor] = nn_data_load_from_file("weights_caffenet/fc7.nnd"); workflow_layer_biases_float[fc7_factor] = nn_data_load_from_file("weights_caffenet/fc7_bias.nnd"); workflow_layer_weights_float[fc8_factor] = nn_data_load_from_file("weights_caffenet/fc8.nnd"); workflow_layer_biases_float[fc8_factor] = nn_data_load_from_file("weights_caffenet/fc8_bias.nnd"); for (auto wlwf : workflow_layer_weights_float) if (wlwf == nullptr) throw std::runtime_error("error: one or more of file with weights was not loaded"); for (auto wlbf : workflow_layer_biases_float) if (wlbf == nullptr) throw std::runtime_error("error: one or more of file with biases was not loaded"); di->workflow_create_function(&workflow,1,1); // { c1 c2_1 c2_2 c3 c4_1 c4_2 c5_1 c5_2 fc6 fc7 fc8 } const size_t nnwrkld_accumulator_fraction[last_factor+1] = { 16, 19, 17, 22, 22, 22, 23, 22, 24, 26, 24 }; const size_t nnwrkld_output_fraction[last_factor+1] = { 3, 7, 7, 6, 7, 7, 8, 8, 10, 12, 26 }; const size_t nnwrkld_weights_float_fraction[last_factor+1] = { 16, 16, 14, 15, 16, 16, 16, 15, 16, 16, 12 }; const size_t nnwrkld_biases_float_fraction[last_factor+1] = {nnwrkld_accumulator_fraction[conv1_factor], nnwrkld_accumulator_fraction[conv2_1_factor], nnwrkld_accumulator_fraction[conv2_2_factor], nnwrkld_accumulator_fraction[conv3_factor], nnwrkld_accumulator_fraction[conv4_1_factor], nnwrkld_accumulator_fraction[conv4_2_factor], nnwrkld_accumulator_fraction[conv5_1_factor], nnwrkld_accumulator_fraction[conv5_2_factor], nnwrkld_accumulator_fraction[fc6_factor], nnwrkld_accumulator_fraction[fc7_factor], nnwrkld_accumulator_fraction[fc8_factor] }; for(auto i = 0; i<=last_factor;++i) { workflow_layer_weights_int16[i] = new nn::data<int16_t>(static_cast<const size_t*>(workflow_layer_weights_float[i]->size),workflow_layer_weights_float[i]->dimension); workflow_layer_biases_int32[i] = new nn::data<int32_t>(static_cast<const size_t*>(workflow_layer_biases_float[i]->size),workflow_layer_biases_float[i]->dimension); nn_data_convert_float_to_int16_fixedpoint(workflow_layer_weights_float[i],workflow_layer_weights_int16[i],1 << nnwrkld_weights_float_fraction[i]); nn_data_convert_float_to_int32_fixedpoint(workflow_layer_biases_float[i],workflow_layer_biases_int32[i],1 << nnwrkld_biases_float_fraction[i]); } // ------------------------------------------------------------------------------------------ // STAGE 0 (input) // output: 227x227x3 { di->workflow_item_create_function(&workflow_layer[input],0,nullptr,1); workflow_layer[input]->type = NN_WORK_ITEM_TYPE_INPUT; workflow_layer[input]->arguments.input.index = 0; workflow_layer[input]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[input]->output_format[0].format_3d ={{img_size,img_size,3}}; } // ------------------------------------------------------------------------------------------ // STAGE 0 (imagenet_mean_subtract) // output: 227x227x3 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[input],0}; di->workflow_item_create_function(&workflow_layer[mean_substract],1,&inputs_descriptor,1); workflow_layer[mean_substract]->type = NN_WORK_ITEM_TYPE_ARITHMETIC; workflow_layer[mean_substract]->arguments.forward_arithmetic.factor = mean_factor; workflow_layer[mean_substract]->arguments.forward_arithmetic.arithmetic_function = NN_ARITHMETIC_FUNCTION_SUBTRACTION; workflow_layer[mean_substract]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[mean_substract]->output_format[0].format_3d ={{img_size,img_size,3}}; } // ------------------------------------------------------------------------------------------ // STAGE 0 Convert float to int16 // { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[mean_substract], 0 }; di->workflow_item_create_function(&workflow_layer[convert], 1, &inputs_descriptor, 1); workflow_layer[convert]->type = NN_WORK_ITEM_TYPE_CONVERT_FLOAT_TO_INT16_FIXEDPOINT; workflow_layer[convert]->arguments.forward_convert_float_to_int16_fixedpoint.output_fraction = 0; workflow_layer[convert]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[convert]->output_format[0].format_3d = nn_output_format_3d{ { img_size, img_size, 4 } }; } // ------------------------------------------------------------------------------------------ // STAGE 01 // convo: 11x11 stride 4x4; ReLU; output: 55x55x96 // maxpool: 3x3 stride 2x2; // norm: RESPONSE_ACROSS_MAPS // output: 27x27x96 { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[convert], 0 }; di->workflow_item_create_function(&workflow_layer[conv1], 1, &inputs_descriptor, 1); workflow_layer[conv1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT; workflow_layer[conv1]->name = "c1"; workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv1_factor]; workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv1_factor]; workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 0; workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 0; workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 4; workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 4; workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv1_factor]; workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv1_factor]; workflow_layer[conv1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv1]->output_format[0].format_3d = { { 55, 55, 96 } }; } { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[conv1], 0 }; di->workflow_item_create_function(&workflow_layer[pool1], 1, &inputs_descriptor, 1); workflow_layer[pool1]->type = NN_WORK_ITEM_TYPE_MAX_POOLING_INT16_FIXEDPOINT; workflow_layer[pool1]->name = "p1"; workflow_layer[pool1]->arguments.forward_pooling_fixedpoint.pool_size[0] = 3; workflow_layer[pool1]->arguments.forward_pooling_fixedpoint.pool_size[1] = 3; workflow_layer[pool1]->arguments.forward_pooling_fixedpoint.pool_stride[0] = 2; workflow_layer[pool1]->arguments.forward_pooling_fixedpoint.pool_stride[1] = 2; workflow_layer[pool1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[pool1]->output_format[0].format_3d = { { 27, 27, 96 } }; } { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[pool1], 0 }; di->workflow_item_create_function(&workflow_layer[norm1], 1, &inputs_descriptor, 1); workflow_layer[norm1]->type = NN_WORK_ITEM_TYPE_NORMALIZATION_RESPONSE_ACROSS_MAPS_FORWARD_I16QN; workflow_layer[norm1]->name = "lrn1"; workflow_layer[norm1]->arguments.normalization_response_across_maps_forward_i16qn.k = 1; workflow_layer[norm1]->arguments.normalization_response_across_maps_forward_i16qn.n = 5; workflow_layer[norm1]->arguments.normalization_response_across_maps_forward_i16qn.alpha = 0.00002f; workflow_layer[norm1]->arguments.normalization_response_across_maps_forward_i16qn.beta = 0.75f; workflow_layer[norm1]->arguments.normalization_response_across_maps_forward_i16qn.fractions.input = nnwrkld_output_fraction[conv1_factor]; workflow_layer[norm1]->arguments.normalization_response_across_maps_forward_i16qn.fractions.output = nnwrkld_output_fraction[conv1_factor]; workflow_layer[norm1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[norm1]->output_format[0].format_3d = { { 27, 27, 96 } }; } // ------------------------------------------------------------------------------------------ // STAGE 02 // split: 2 (z-axis 96/2); output 27x27x(2*96/2) // convo: 5x5 stride 1x1; ReLU; 0-padded output: 27x27x(2*256/2) // merge: (z-axis) // maxpool: 3x3 stride 2x2; // norm: RESPONSE_ACROSS_MAPS // output: 13x13x256 { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[norm1], 0 }; di->workflow_item_create_function(&workflow_layer[subv1_1], 1, &inputs_descriptor, 1); // view g1 workflow_layer[subv1_1]->type = NN_WORK_ITEM_TYPE_VIEW; workflow_layer[subv1_1]->arguments.view.origin[0] = 0; workflow_layer[subv1_1]->arguments.view.origin[1] = 0; workflow_layer[subv1_1]->arguments.view.origin[2] = 0; workflow_layer[subv1_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[subv1_1]->output_format[0].format_3d = { { 27, 27, 96 / 2 } }; } { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[norm1], 0 }; di->workflow_item_create_function(&workflow_layer[subv1_2], 1, &inputs_descriptor, 1); // view g2 workflow_layer[subv1_2]->type = NN_WORK_ITEM_TYPE_VIEW; workflow_layer[subv1_2]->arguments.view.origin[0] = 0; workflow_layer[subv1_2]->arguments.view.origin[1] = 0; workflow_layer[subv1_2]->arguments.view.origin[2] = (96 / 2); workflow_layer[subv1_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[subv1_2]->output_format[0].format_3d = { { 27, 27, 96 / 2 } }; } // convolution 2, g1: 5x5 stride 1x1; ReLU; 0-padded output: 13x13x(2*96/2) { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[subv1_1], 0 }; di->workflow_item_create_function(&workflow_layer[conv2_1], 1, &inputs_descriptor, 1); workflow_layer[conv2_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT; workflow_layer[conv2_1]->name = "c2g1"; workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv2_1_factor]; workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv2_1_factor]; workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 2; workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 2; workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 1; workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 1; workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv2_1_factor]; workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv2_1_factor]; workflow_layer[conv2_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv2_1]->output_format[0].format_3d = { { 27, 27, 256 / 2 } }; } // convolution 2, g2: 5x5 stride 1x1; ReLU; 0-padded output: 13x13x(2*96/2) { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[subv1_2], 0 }; di->workflow_item_create_function(&workflow_layer[conv2_2], 1, &inputs_descriptor, 1); workflow_layer[conv2_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT; workflow_layer[conv2_2]->name = "c2g2"; workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv2_2_factor]; workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv2_2_factor]; workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 2; workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 2; workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 1; workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 1; workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv2_2_factor]; workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv2_2_factor]; workflow_layer[conv2_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv2_2]->output_format[0].format_3d = { { 27, 27, 256 / 2 } }; } // merge g1 and g2 { nn_workflow_use_descriptor_t inputs_descriptor[] = { { workflow_layer[conv2_1], 0 }, { workflow_layer[conv2_2], 0 } }; di->workflow_item_create_function(&workflow_layer[merge2], 2, inputs_descriptor, 1); workflow_layer[merge2]->type = NN_WORK_ITEM_TYPE_MERGE; workflow_layer[merge2]->arguments.forward_merge.axis = 2; // value 2 for z-axis workflow_layer[merge2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[merge2]->output_format[0].format_3d = { { 27, 27, 256 } }; } // maxpool: 3x3 stride 2x2; { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[merge2], 0 }; di->workflow_item_create_function(&workflow_layer[pool2], 1, &inputs_descriptor, 1); // pooling workflow_layer[pool2]->type = NN_WORK_ITEM_TYPE_MAX_POOLING_INT16_FIXEDPOINT; workflow_layer[pool2]->name = "p2"; workflow_layer[pool2]->arguments.forward_pooling_fixedpoint.pool_size[0] = 3; workflow_layer[pool2]->arguments.forward_pooling_fixedpoint.pool_size[1] = 3; workflow_layer[pool2]->arguments.forward_pooling_fixedpoint.pool_stride[0] = 2; workflow_layer[pool2]->arguments.forward_pooling_fixedpoint.pool_stride[1] = 2; workflow_layer[pool2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[pool2]->output_format[0].format_3d = { { 13, 13, 256 } }; } //norm: RESPONSE_ACROSS_MAPS; output: 13x13x256 { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[pool2], 0 }; di->workflow_item_create_function(&workflow_layer[norm2], 1, &inputs_descriptor, 1); workflow_layer[norm2]->type = NN_WORK_ITEM_TYPE_NORMALIZATION_RESPONSE_ACROSS_MAPS_FORWARD_I16QN; workflow_layer[norm2]->name = "lrn2"; workflow_layer[norm2]->arguments.normalization_response_across_maps_forward_i16qn.k = 1; workflow_layer[norm2]->arguments.normalization_response_across_maps_forward_i16qn.n = 5; workflow_layer[norm2]->arguments.normalization_response_across_maps_forward_i16qn.alpha = 0.00002f; workflow_layer[norm2]->arguments.normalization_response_across_maps_forward_i16qn.beta = 0.75f; workflow_layer[norm2]->arguments.normalization_response_across_maps_forward_i16qn.fractions.input = nnwrkld_output_fraction[conv2_2_factor]; workflow_layer[norm2]->arguments.normalization_response_across_maps_forward_i16qn.fractions.output = nnwrkld_output_fraction[conv2_2_factor]; workflow_layer[norm2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[norm2]->output_format[0].format_3d = { { 13, 13, 256 } }; } // ------------------------------------------------------------------------------------------ // STAGE 03 // convo: 3x3 stride 1x1; ReLU; 0-padded // output: 13x13x384 { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[norm2], 0 }; di->workflow_item_create_function(&workflow_layer[conv3], 1, &inputs_descriptor, 1); workflow_layer[conv3]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT; workflow_layer[conv3]->name = "c3"; workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv3_factor]; workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv3_factor]; workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 1; workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 1; workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 1; workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 1; workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv3_factor]; workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv3_factor]; workflow_layer[conv3]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv3]->output_format[0].format_3d = { { 13, 13, 384 } }; } // ------------------------------------------------------------------------------------------ // STAGE 04 // split: 2 (z-axis 384/2) // convo: 3x3 stride 1x1; ReLU; 0-padded // output: 13x13x(2*384/2) (continue split to next stage) { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[conv3], 0 }; di->workflow_item_create_function(&workflow_layer[subv3_1], 1, &inputs_descriptor, 1); // view g1 workflow_layer[subv3_1]->type = NN_WORK_ITEM_TYPE_VIEW; workflow_layer[subv3_1]->arguments.view.origin[0] = 0; workflow_layer[subv3_1]->arguments.view.origin[1] = 0; workflow_layer[subv3_1]->arguments.view.origin[2] = 0; workflow_layer[subv3_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[subv3_1]->output_format[0].format_3d = { { 13, 13, 384 / 2 } }; } { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[conv3], 0 }; di->workflow_item_create_function(&workflow_layer[subv3_2], 1, &inputs_descriptor, 1); // view g2 workflow_layer[subv3_2]->type = NN_WORK_ITEM_TYPE_VIEW; workflow_layer[subv3_2]->arguments.view.origin[0] = 0; workflow_layer[subv3_2]->arguments.view.origin[1] = 0; workflow_layer[subv3_2]->arguments.view.origin[2] = 384 / 2; workflow_layer[subv3_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[subv3_2]->output_format[0].format_3d = { { 13, 13, 384 / 2 } }; } { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[subv3_1], 0 }; di->workflow_item_create_function(&workflow_layer[conv4_1], 1, &inputs_descriptor, 1); // conv g1 workflow_layer[conv4_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT; workflow_layer[conv4_1]->name = "c4g1"; workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv4_1_factor]; workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv4_1_factor]; workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 1; workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 1; workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 1; workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 1; workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv4_1_factor]; workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv4_1_factor]; workflow_layer[conv4_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv4_1]->output_format[0].format_3d = { { 13, 13, 384 / 2 } }; } { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[subv3_2], 0 }; di->workflow_item_create_function(&workflow_layer[conv4_2], 1, &inputs_descriptor, 1); // conv g2 workflow_layer[conv4_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT; workflow_layer[conv4_2]->name = "c4g2"; workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv4_2_factor]; workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv4_2_factor]; workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 1; workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 1; workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 1; workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 1; workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv4_2_factor]; workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv4_2_factor]; workflow_layer[conv4_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv4_2]->output_format[0].format_3d = { { 13, 13, 384 / 2 } }; } // ------------------------------------------------------------------------------------------ // STAGE 05 // convo: 3x3 stride 1x1; ReLU; 0-padded; output: 13x13x(2*256/2) // merge: (z-axis) // maxpool: 3x3 stride 2x2; // output: 13x13x256 { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[conv4_1], 0 }; di->workflow_item_create_function(&workflow_layer[conv5_1], 1, &inputs_descriptor, 1); // conv g1 workflow_layer[conv5_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT; workflow_layer[conv5_1]->name = "c5g1"; workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv5_1_factor]; workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv5_1_factor]; workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 1; workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 1; workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 1; workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 1; workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv5_1_factor]; workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv5_1_factor]; workflow_layer[conv5_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv5_1]->output_format[0].format_3d = { { 13, 13, 256 / 2 } }; } { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[conv4_2], 0 }; di->workflow_item_create_function(&workflow_layer[conv5_2], 1, &inputs_descriptor, 1); // conv g2 workflow_layer[conv5_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT; workflow_layer[conv5_2]->name = "c5g2"; workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv5_2_factor]; workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv5_2_factor]; workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 1; workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 1; workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 1; workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 1; workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv5_2_factor]; workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv5_2_factor]; workflow_layer[conv5_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv5_2]->output_format[0].format_3d = { { 13, 13, 256 / 2 } }; } // merge g1 and g2 { nn_workflow_use_descriptor_t inputs_descriptor[] = {{workflow_layer[conv5_1],0},{workflow_layer[conv5_2],0}}; di->workflow_item_create_function(&workflow_layer[merge5], 2, inputs_descriptor, 1); workflow_layer[merge5]->type = NN_WORK_ITEM_TYPE_MERGE; workflow_layer[merge5]->arguments.forward_merge.axis = 2; // value 2 for z-axis workflow_layer[merge5]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[merge5]->output_format[0].format_3d = { { 13, 13, 256 } }; } // maxpool: 3x3 stride 2x2; { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[merge5], 0 }; di->workflow_item_create_function(&workflow_layer[pool5], 1, &inputs_descriptor, 1); // pooling workflow_layer[pool5]->type = NN_WORK_ITEM_TYPE_MAX_POOLING_INT16_FIXEDPOINT; workflow_layer[pool5]->name = "p5"; workflow_layer[pool5]->arguments.forward_pooling_fixedpoint.pool_size[0] = 3; workflow_layer[pool5]->arguments.forward_pooling_fixedpoint.pool_size[1] = 3; workflow_layer[pool5]->arguments.forward_pooling_fixedpoint.pool_stride[0] = 2; workflow_layer[pool5]->arguments.forward_pooling_fixedpoint.pool_stride[1] = 2; workflow_layer[pool5]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.accumulator = 16; workflow_layer[pool5]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.output = 8; workflow_layer[pool5]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[pool5]->output_format[0].format_3d = { { 6, 6, 256 } }; } // ------------------------------------------------------------------------------------------ // STAGE 06 // full: ReLU // output: 4096 { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[pool5], 0 }; di->workflow_item_create_function(&workflow_layer[fc6], 1, &inputs_descriptor, 1); workflow_layer[fc6]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED_FORWARD_I16QN_I16QN; workflow_layer[fc6]->name = "fc6"; workflow_layer[fc6]->arguments.fully_connected_forward_i16qn_i16qn.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[fc6]->arguments.fully_connected_forward_i16qn_i16qn.weights = workflow_layer_weights_int16[fc6_factor]; workflow_layer[fc6]->arguments.fully_connected_forward_i16qn_i16qn.biases = workflow_layer_biases_int32[fc6_factor]; workflow_layer[fc6]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.accumulator = nnwrkld_accumulator_fraction[fc6_factor]; workflow_layer[fc6]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.output = nnwrkld_output_fraction[fc6_factor]; workflow_layer[fc6]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[fc6]->output_format[0].format_1d = { { 4096 } }; } // ------------------------------------------------------------------------------------------ // STAGE 07 // full: ReLU // output: 4096 { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[fc6], 0 }; di->workflow_item_create_function(&workflow_layer[fc7], 1, &inputs_descriptor, 1); workflow_layer[fc7]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED_FORWARD_I16QN_I16QN; workflow_layer[fc7]->name = "fc7"; workflow_layer[fc7]->arguments.fully_connected_forward_i16qn_i16qn.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[fc7]->arguments.fully_connected_forward_i16qn_i16qn.weights = workflow_layer_weights_int16[fc7_factor]; workflow_layer[fc7]->arguments.fully_connected_forward_i16qn_i16qn.biases = workflow_layer_biases_int32[fc7_factor]; workflow_layer[fc7]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.accumulator = nnwrkld_accumulator_fraction[fc7_factor]; workflow_layer[fc7]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.output = nnwrkld_output_fraction[fc7_factor]; workflow_layer[fc7]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[fc7]->output_format[0].format_1d = { { 4096 } }; } // ------------------------------------------------------------------------------------------ // STAGE 08 // full: ; // output: 1000 { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[fc7], 0 }; di->workflow_item_create_function(&workflow_layer[fc8], 1, &inputs_descriptor, 1); workflow_layer[fc8]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED_FORWARD_I16QN_I32QN; workflow_layer[fc8]->name = "fc8"; workflow_layer[fc8]->arguments.fully_connected_forward_i16qn_i32qn.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_NONE; workflow_layer[fc8]->arguments.fully_connected_forward_i16qn_i32qn.weights = workflow_layer_weights_int16[fc8_factor]; workflow_layer[fc8]->arguments.fully_connected_forward_i16qn_i32qn.biases = workflow_layer_biases_int32[fc8_factor]; workflow_layer[fc8]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.accumulator = nnwrkld_accumulator_fraction[fc8_factor]; workflow_layer[fc8]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.output = nnwrkld_output_fraction[fc8_factor]; workflow_layer[fc8]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[fc8]->output_format[0].format_1d = { { 1000 } }; } // ------------------------------------------------------------------------------------------ // STAGE 09 (softmax) // output: 1000 { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[fc8], 0 }; di->workflow_item_create_function(&workflow_layer[softmax], 1, &inputs_descriptor, 1); workflow_layer[softmax]->type = NN_WORK_ITEM_TYPE_SOFTMAX_FIXEDPOINT; workflow_layer[softmax]->arguments.forward_softmax_fixedpoint.input_fraction = nnwrkld_output_fraction[fc8_factor]; workflow_layer[softmax]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[softmax]->output_format[0].format_1d = { { 1000 } }; } // ------------------------------------------------------------------------------------------ // STAGE 10 (output) // output: 1000 { nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[softmax], 0 }; di->workflow_item_create_function(&workflow_layer[output], 1, &inputs_descriptor, 1); workflow_layer[output]->type = NN_WORK_ITEM_TYPE_OUTPUT; workflow_layer[output]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[output]->output_format[0].format_1d = { { 1000 } }; } // ------------------------------------------------------------------------------------------- // END of workflow stages definition // ------------------------------------------------------------------------------------------- workflow->input[0] = workflow_layer[input]; workflow->output[0] = workflow_layer[output]; // ------------------------------------------------------------------------------------------- return workflow; }
virtual nn_workflow_t *init_test_workflow(nn_device_interface_0_t *_di) { if(!is_valid()) throw std::runtime_error(error_); for(auto wi : workflow_layer) wi = nullptr; for(auto wb : workflow_layer_factor) wb = nullptr; this->di = _di; // create and populate nn:data factors (weights and biases) for successive layers workflow_layer_factor[mean_factor] = new nn::data<float>(img_size,img_size,3); nn_data_populate(workflow_layer_factor[mean_factor],104.007f,122.679f); workflow_layer_factor[conv1_weights] = new nn::data<float>(11,11,3,96); nn_data_populate(workflow_layer_factor[conv1_weights],-0.374f,0.403f); workflow_layer_factor[conv1_biases] = new nn::data<float>(96); nn_data_populate(workflow_layer_factor[conv1_biases],-0.854f,0.232f); workflow_layer_factor[conv2_1_weights] = new nn::data<float>(5,5,48,128); nn_data_populate(workflow_layer_factor[conv2_1_weights],-0.285f,0.379f); workflow_layer_factor[conv2_1_biases] = new nn::data<float>(128); nn_data_populate(workflow_layer_factor[conv2_1_biases],0.974f,1.034f); workflow_layer_factor[conv2_2_weights] = new nn::data<float>(5,5,48,128); nn_data_populate(workflow_layer_factor[conv2_2_weights],-0.269f,0.416f); workflow_layer_factor[conv2_2_biases] = new nn::data<float>(128); nn_data_populate(workflow_layer_factor[conv2_2_biases],0.958f,1.027f); workflow_layer_factor[conv3_weights] = new nn::data<float>(3,3,256,384); nn_data_populate(workflow_layer_factor[conv3_weights],-0.185f,0.512f); workflow_layer_factor[conv3_biases] = new nn::data<float>(384); nn_data_populate(workflow_layer_factor[conv3_biases],-0.104f,0.093f); workflow_layer_factor[conv4_1_weights] = new nn::data<float>(3,3,192,192); nn_data_populate(workflow_layer_factor[conv4_1_weights],-0.103f,0.322f); workflow_layer_factor[conv4_1_biases] = new nn::data<float>(192); nn_data_populate(workflow_layer_factor[conv4_1_biases],0.844f,1.142f); workflow_layer_factor[conv4_2_weights] = new nn::data<float>(3,3,192,192); nn_data_populate(workflow_layer_factor[conv4_2_weights],-0.142f,0.353f); workflow_layer_factor[conv4_2_biases] = new nn::data<float>(192); nn_data_populate(workflow_layer_factor[conv4_2_biases],0.77f,1.219f); workflow_layer_factor[conv5_1_weights] = new nn::data<float>(3,3,192,128); nn_data_populate(workflow_layer_factor[conv5_1_weights],-0.092f,0.254f); workflow_layer_factor[conv5_1_biases] = new nn::data<float>(128); nn_data_populate(workflow_layer_factor[conv5_1_biases],0.723f,1.50f); workflow_layer_factor[conv5_2_weights] = new nn::data<float>(3,3,192,128); nn_data_populate(workflow_layer_factor[conv5_2_weights],-0.133f,0.315f); workflow_layer_factor[conv5_2_biases] = new nn::data<float>(128); nn_data_populate(workflow_layer_factor[conv5_2_biases],0.623f,1.742f); workflow_layer_factor[fc6_weights] = new nn::data<float>(6,6,256,4096); nn_data_populate(workflow_layer_factor[fc6_weights],-0.035f,0.048f); workflow_layer_factor[fc6_biases] = new nn::data<float>(4096); nn_data_populate(workflow_layer_factor[fc6_biases],0.92f,1.057f); workflow_layer_factor[fc7_weights] = new nn::data<float>(4096,4096); nn_data_populate(workflow_layer_factor[fc7_weights],-0.032f,0.052f); workflow_layer_factor[fc7_biases] = new nn::data<float>(4096); nn_data_populate(workflow_layer_factor[fc7_biases],0.741f,1.26f); workflow_layer_factor[fc8_weights] = new nn::data<float>(4096,1000); nn_data_populate(workflow_layer_factor[fc8_weights],-0.045f,0.067f); workflow_layer_factor[fc8_biases] = new nn::data<float>(1000); nn_data_populate(workflow_layer_factor[fc8_biases],-0.351f,0.425f); di->workflow_create_function(&workflow,1,1); // ------------------------------------------------------------------------------------------ // STAGE 0 (input) // output: 227x227x3 { di->workflow_item_create_function(&workflow_layer[input],0,nullptr,1); workflow_layer[input]->type = NN_WORK_ITEM_TYPE_INPUT; workflow_layer[input]->arguments.input.index = 0; workflow_layer[input]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[input]->output_format[0].format_3d ={{img_size,img_size,3}}; } // ------------------------------------------------------------------------------------------ // STAGE 0 (imagenet_mean_subtract) // output: 227x227x3 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[input],0}; di->workflow_item_create_function(&workflow_layer[mean_substract],1,&inputs_descriptor,1); workflow_layer[mean_substract]->type = NN_WORK_ITEM_TYPE_ARITHMETIC; workflow_layer[mean_substract]->arguments.forward_arithmetic.factor = workflow_layer_factor[mean_factor]; workflow_layer[mean_substract]->arguments.forward_arithmetic.arithmetic_function = NN_ARITHMETIC_FUNCTION_SUBTRACTION; workflow_layer[mean_substract]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[mean_substract]->output_format[0].format_3d ={{img_size,img_size,3}}; } // ------------------------------------------------------------------------------------------ // STAGE 01 // convo: 11x11 stride 4x4; ReLU; output: 55x55x96 // maxpool: 3x3 stride 2x2; // norm: RESPONSE_ACROSS_MAPS // output: 27x27x96 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[mean_substract],0}; di->workflow_item_create_function(&workflow_layer[conv1],1,&inputs_descriptor,1); workflow_layer[conv1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv1]->name = "c1"; workflow_layer[conv1]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv1]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv1]->arguments.forward_convolution.weights = workflow_layer_factor[conv1_weights]; workflow_layer[conv1]->arguments.forward_convolution.biases = workflow_layer_factor[conv1_biases]; workflow_layer[conv1]->arguments.forward_convolution.center_offset[0] = 0; workflow_layer[conv1]->arguments.forward_convolution.center_offset[1] = 0; workflow_layer[conv1]->arguments.forward_convolution.stride[0] = 4; workflow_layer[conv1]->arguments.forward_convolution.stride[1] = 4; workflow_layer[conv1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv1]->output_format[0].format_3d ={{55,55,96}}; } { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv1],0}; di->workflow_item_create_function(&workflow_layer[pool1],1,&inputs_descriptor,1); workflow_layer[pool1]->type = NN_WORK_ITEM_TYPE_POOLING; workflow_layer[pool1]->name = "p1"; workflow_layer[pool1]->arguments.forward_pooling.mode = NN_POOLING_MODE_MAX; workflow_layer[pool1]->arguments.forward_pooling.size[0] = 3; workflow_layer[pool1]->arguments.forward_pooling.size[1] = 3; workflow_layer[pool1]->arguments.forward_pooling.stride[0] = 2; workflow_layer[pool1]->arguments.forward_pooling.stride[1] = 2; workflow_layer[pool1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[pool1]->output_format[0].format_3d ={{27,27,96}}; } { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[pool1],0}; di->workflow_item_create_function(&workflow_layer[norm1],1,&inputs_descriptor,1); workflow_layer[norm1]->type = NN_WORK_ITEM_TYPE_NORMALIZATION; workflow_layer[norm1]->name = "lrn1"; workflow_layer[norm1]->arguments.forward_normalization.normalization.mode = NN_NORMALIZATION_MODE_RESPONSE_ACROSS_MAPS; workflow_layer[norm1]->arguments.forward_normalization.normalization.k = 1; // in Krishevsky's article is 2 workflow_layer[norm1]->arguments.forward_normalization.normalization.n = 5; workflow_layer[norm1]->arguments.forward_normalization.normalization.alpha = 0.0001f/5; // in Krishevsky's paper is 1e-4, // but didn't write that sum of the squares // is divided by number of elements (n) workflow_layer[norm1]->arguments.forward_normalization.normalization.beta = 0.75f; workflow_layer[norm1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[norm1]->output_format[0].format_3d ={{27,27,96}}; } // ------------------------------------------------------------------------------------------ // STAGE 02 // split: 2 (z-axis 96/2); output 27x27x(2*96/2) // convo: 5x5 stride 1x1; ReLU; 0-padded output: 27x27x(2*256/2) // merge: (z-axis) // maxpool: 3x3 stride 2x2; // norm: RESPONSE_ACROSS_MAPS // output: 13x13x256 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[norm1],0}; di->workflow_item_create_function(&workflow_layer[subv1_1],1,&inputs_descriptor,1); // view g1 workflow_layer[subv1_1]->type = NN_WORK_ITEM_TYPE_VIEW; workflow_layer[subv1_1]->arguments.view.origin[0] = 0; workflow_layer[subv1_1]->arguments.view.origin[1] = 0; workflow_layer[subv1_1]->arguments.view.origin[2] = 0; workflow_layer[subv1_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[subv1_1]->output_format[0].format_3d ={{27,27,96/2}}; } { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[norm1],0}; di->workflow_item_create_function(&workflow_layer[subv1_2],1,&inputs_descriptor,1); // view g2 workflow_layer[subv1_2]->type = NN_WORK_ITEM_TYPE_VIEW; workflow_layer[subv1_2]->arguments.view.origin[0] = 0; workflow_layer[subv1_2]->arguments.view.origin[1] = 0; workflow_layer[subv1_2]->arguments.view.origin[2] = (96/2); workflow_layer[subv1_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[subv1_2]->output_format[0].format_3d ={{27,27,96/2}}; } // convolution 2, g1: 5x5 stride 1x1; ReLU; 0-padded output: 13x13x(2*96/2) { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[subv1_1],0}; di->workflow_item_create_function(&workflow_layer[conv2_1],1,&inputs_descriptor,1); workflow_layer[conv2_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv2_1]->name = "c2g1"; workflow_layer[conv2_1]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv2_1]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv2_1]->arguments.forward_convolution.weights = workflow_layer_factor[conv2_1_weights]; workflow_layer[conv2_1]->arguments.forward_convolution.biases = workflow_layer_factor[conv2_1_biases]; workflow_layer[conv2_1]->arguments.forward_convolution.center_offset[0] = 2; workflow_layer[conv2_1]->arguments.forward_convolution.center_offset[1] = 2; workflow_layer[conv2_1]->arguments.forward_convolution.stride[0] = 1; workflow_layer[conv2_1]->arguments.forward_convolution.stride[1] = 1; workflow_layer[conv2_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv2_1]->output_format[0].format_3d ={{27,27,256/2}}; } // convolution 2, g2: 5x5 stride 1x1; ReLU; 0-padded output: 13x13x(2*96/2) { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[subv1_2],0}; di->workflow_item_create_function(&workflow_layer[conv2_2],1,&inputs_descriptor,1); workflow_layer[conv2_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv2_2]->name = "c2g2"; workflow_layer[conv2_2]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv2_2]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv2_2]->arguments.forward_convolution.weights = workflow_layer_factor[conv2_2_weights]; workflow_layer[conv2_2]->arguments.forward_convolution.biases = workflow_layer_factor[conv2_2_biases]; workflow_layer[conv2_2]->arguments.forward_convolution.center_offset[0] = 2; workflow_layer[conv2_2]->arguments.forward_convolution.center_offset[1] = 2; workflow_layer[conv2_2]->arguments.forward_convolution.stride[0] = 1; workflow_layer[conv2_2]->arguments.forward_convolution.stride[1] = 1; workflow_layer[conv2_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv2_2]->output_format[0].format_3d ={{27,27,256/2}}; } // merge g1 and g2 { nn_workflow_use_descriptor_t inputs_descriptor[] ={{workflow_layer[conv2_1],0},{workflow_layer[conv2_2],0}}; di->workflow_item_create_function(&workflow_layer[merge2],2,inputs_descriptor,1); workflow_layer[merge2]->type = NN_WORK_ITEM_TYPE_MERGE; workflow_layer[merge2]->arguments.forward_merge.axis = 2; // value 2 for z-axis workflow_layer[merge2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[merge2]->output_format[0].format_3d ={{27,27,256}}; } // maxpool: 3x3 stride 2x2; { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[merge2],0}; di->workflow_item_create_function(&workflow_layer[pool2],1,&inputs_descriptor,1); // pooling workflow_layer[pool2]->type = NN_WORK_ITEM_TYPE_POOLING; workflow_layer[pool2]->name = "p2"; workflow_layer[pool2]->arguments.forward_pooling.mode = NN_POOLING_MODE_MAX; workflow_layer[pool2]->arguments.forward_pooling.size[0] = 3; workflow_layer[pool2]->arguments.forward_pooling.size[1] = 3; workflow_layer[pool2]->arguments.forward_pooling.stride[0] = 2; workflow_layer[pool2]->arguments.forward_pooling.stride[1] = 2; workflow_layer[pool2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[pool2]->output_format[0].format_3d ={{13,13,256}}; } //norm: RESPONSE_ACROSS_MAPS; output: 13x13x256 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[pool2],0}; di->workflow_item_create_function(&workflow_layer[norm2],1,&inputs_descriptor,1); workflow_layer[norm2]->type = NN_WORK_ITEM_TYPE_NORMALIZATION; workflow_layer[norm2]->name = "lrn2"; workflow_layer[norm2]->arguments.forward_normalization.normalization.mode = NN_NORMALIZATION_MODE_RESPONSE_ACROSS_MAPS; workflow_layer[norm2]->arguments.forward_normalization.normalization.k = 1; // | workflow_layer[norm2]->arguments.forward_normalization.normalization.n = 5; // | workflow_layer[norm2]->arguments.forward_normalization.normalization.alpha = 0.0001f/5; // > see coment at wrkflwi_stage_1_norm workflow_layer[norm2]->arguments.forward_normalization.normalization.beta = 0.75f; // | workflow_layer[norm2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[norm2]->output_format[0].format_3d ={{13,13,256}}; } // ------------------------------------------------------------------------------------------ // STAGE 03 // convo: 3x3 stride 1x1; ReLU; 0-padded // output: 13x13x384 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[norm2],0}; di->workflow_item_create_function(&workflow_layer[conv3],1,&inputs_descriptor,1); workflow_layer[conv3]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv3]->name = "c3"; workflow_layer[conv3]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv3]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv3]->arguments.forward_convolution.weights = workflow_layer_factor[conv3_weights]; workflow_layer[conv3]->arguments.forward_convolution.biases = workflow_layer_factor[conv3_biases]; workflow_layer[conv3]->arguments.forward_convolution.center_offset[0] = 1; workflow_layer[conv3]->arguments.forward_convolution.center_offset[1] = 1; workflow_layer[conv3]->arguments.forward_convolution.stride[0] = 1; workflow_layer[conv3]->arguments.forward_convolution.stride[1] = 1; workflow_layer[conv3]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv3]->output_format[0].format_3d ={{13,13,384}}; } // ------------------------------------------------------------------------------------------ // STAGE 04 // split: 2 (z-axis 384/2) // convo: 3x3 stride 1x1; ReLU; 0-padded // output: 13x13x(2*384/2) (continue split to next stage) { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv3],0}; di->workflow_item_create_function(&workflow_layer[subv3_1],1,&inputs_descriptor,1); // view g1 workflow_layer[subv3_1]->type = NN_WORK_ITEM_TYPE_VIEW; workflow_layer[subv3_1]->arguments.view.origin[0] = 0; workflow_layer[subv3_1]->arguments.view.origin[1] = 0; workflow_layer[subv3_1]->arguments.view.origin[2] = 0; workflow_layer[subv3_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[subv3_1]->output_format[0].format_3d ={{13,13,384/2}}; } { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv3],0}; di->workflow_item_create_function(&workflow_layer[subv3_2],1,&inputs_descriptor,1); // view g2 workflow_layer[subv3_2]->type = NN_WORK_ITEM_TYPE_VIEW; workflow_layer[subv3_2]->arguments.view.origin[0] = 0; workflow_layer[subv3_2]->arguments.view.origin[1] = 0; workflow_layer[subv3_2]->arguments.view.origin[2] = 384/2; workflow_layer[subv3_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[subv3_2]->output_format[0].format_3d ={{13,13,384/2}}; } { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[subv3_1],0}; di->workflow_item_create_function(&workflow_layer[conv4_1],1,&inputs_descriptor,1); // conv g1 workflow_layer[conv4_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv4_1]->name = "c4g1"; workflow_layer[conv4_1]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv4_1]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv4_1]->arguments.forward_convolution.weights = workflow_layer_factor[conv4_1_weights]; workflow_layer[conv4_1]->arguments.forward_convolution.biases = workflow_layer_factor[conv4_1_biases]; workflow_layer[conv4_1]->arguments.forward_convolution.center_offset[0] = 1; workflow_layer[conv4_1]->arguments.forward_convolution.center_offset[1] = 1; workflow_layer[conv4_1]->arguments.forward_convolution.stride[0] = 1; workflow_layer[conv4_1]->arguments.forward_convolution.stride[1] = 1; workflow_layer[conv4_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv4_1]->output_format[0].format_3d ={{13,13,384/2}}; } { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[subv3_2],0}; di->workflow_item_create_function(&workflow_layer[conv4_2],1,&inputs_descriptor,1); // conv g2 workflow_layer[conv4_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv4_2]->name = "c4g2"; workflow_layer[conv4_2]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv4_2]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv4_2]->arguments.forward_convolution.weights = workflow_layer_factor[conv4_1_weights]; workflow_layer[conv4_2]->arguments.forward_convolution.biases = workflow_layer_factor[conv4_2_biases]; workflow_layer[conv4_2]->arguments.forward_convolution.center_offset[0] = 1; workflow_layer[conv4_2]->arguments.forward_convolution.center_offset[1] = 1; workflow_layer[conv4_2]->arguments.forward_convolution.stride[0] = 1; workflow_layer[conv4_2]->arguments.forward_convolution.stride[1] = 1; workflow_layer[conv4_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv4_2]->output_format[0].format_3d ={{13,13,384/2}}; } // ------------------------------------------------------------------------------------------ // STAGE 05 // convo: 3x3 stride 1x1; ReLU; 0-padded; output: 13x13x(2*256/2) // merge: (z-axis) // maxpool: 3x3 stride 2x2; // output: 13x13x256 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv4_1],0}; di->workflow_item_create_function(&workflow_layer[conv5_1],1,&inputs_descriptor,1); // conv g1 workflow_layer[conv5_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv5_1]->name = "c5g1"; workflow_layer[conv5_1]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv5_1]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv5_1]->arguments.forward_convolution.weights = workflow_layer_factor[conv5_1_weights]; workflow_layer[conv5_1]->arguments.forward_convolution.biases = workflow_layer_factor[conv5_1_biases]; workflow_layer[conv5_1]->arguments.forward_convolution.center_offset[0] = 1; workflow_layer[conv5_1]->arguments.forward_convolution.center_offset[1] = 1; workflow_layer[conv5_1]->arguments.forward_convolution.stride[0] = 1; workflow_layer[conv5_1]->arguments.forward_convolution.stride[1] = 1; workflow_layer[conv5_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv5_1]->output_format[0].format_3d ={{13,13,256/2}}; } { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv4_2],0}; di->workflow_item_create_function(&workflow_layer[conv5_2],1,&inputs_descriptor,1); // conv g2 workflow_layer[conv5_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv5_2]->name = "c5g2"; workflow_layer[conv5_2]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv5_2]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv5_2]->arguments.forward_convolution.weights = workflow_layer_factor[conv5_2_weights]; workflow_layer[conv5_2]->arguments.forward_convolution.biases = workflow_layer_factor[conv5_2_biases]; workflow_layer[conv5_2]->arguments.forward_convolution.center_offset[0] = 1; workflow_layer[conv5_2]->arguments.forward_convolution.center_offset[1] = 1; workflow_layer[conv5_2]->arguments.forward_convolution.stride[0] = 1; workflow_layer[conv5_2]->arguments.forward_convolution.stride[1] = 1; workflow_layer[conv5_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv5_2]->output_format[0].format_3d ={{13,13,256/2}}; } // merge g1 and g2 { nn_workflow_use_descriptor_t inputs_descriptor[] ={{workflow_layer[conv5_1],0},{workflow_layer[conv5_2],0}}; di->workflow_item_create_function(&workflow_layer[merge5],2,inputs_descriptor,1); workflow_layer[merge5]->type = NN_WORK_ITEM_TYPE_MERGE; workflow_layer[merge5]->arguments.forward_merge.axis = 2; // value 2 for z-axis workflow_layer[merge5]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[merge5]->output_format[0].format_3d ={{13,13,256}}; } // maxpool: 3x3 stride 2x2; { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[merge5],0}; di->workflow_item_create_function(&workflow_layer[pool5],1,&inputs_descriptor,1); // pooling workflow_layer[pool5]->type = NN_WORK_ITEM_TYPE_POOLING; workflow_layer[pool5]->name = "p5"; workflow_layer[pool5]->arguments.forward_pooling.mode = NN_POOLING_MODE_MAX; workflow_layer[pool5]->arguments.forward_pooling.size[0] = 3; workflow_layer[pool5]->arguments.forward_pooling.size[1] = 3; workflow_layer[pool5]->arguments.forward_pooling.stride[0] = 2; workflow_layer[pool5]->arguments.forward_pooling.stride[1] = 2; workflow_layer[pool5]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[pool5]->output_format[0].format_3d ={{6,6,256}}; } // ------------------------------------------------------------------------------------------ // STAGE 06 // full: ReLU // output: 4096 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[pool5],0}; di->workflow_item_create_function(&workflow_layer[fc6],1,&inputs_descriptor,1); workflow_layer[fc6]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED; workflow_layer[fc6]->name = "fc6"; workflow_layer[fc6]->arguments.forward_fully_connected.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[fc6]->arguments.forward_fully_connected.weights = workflow_layer_factor[fc6_weights]; workflow_layer[fc6]->arguments.forward_fully_connected.biases = workflow_layer_factor[fc6_biases]; workflow_layer[fc6]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[fc6]->output_format[0].format_1d ={{4096}}; } // ------------------------------------------------------------------------------------------ // STAGE 07 // full: ReLU // output: 4096 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[fc6],0}; di->workflow_item_create_function(&workflow_layer[fc7],1,&inputs_descriptor,1); workflow_layer[fc7]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED; workflow_layer[fc7]->name = "fc7"; workflow_layer[fc7]->arguments.forward_fully_connected.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[fc7]->arguments.forward_fully_connected.weights = workflow_layer_factor[fc7_weights]; workflow_layer[fc7]->arguments.forward_fully_connected.biases = workflow_layer_factor[fc7_biases]; workflow_layer[fc7]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[fc7]->output_format[0].format_1d ={{4096}}; } // ------------------------------------------------------------------------------------------ // STAGE 08 // full: ; // output: 1000 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[fc7],0}; di->workflow_item_create_function(&workflow_layer[fc8],1,&inputs_descriptor,1); workflow_layer[fc8]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED; workflow_layer[fc8]->name = "fc8"; workflow_layer[fc8]->arguments.forward_fully_connected.activation.function = NN_ACTIVATION_FUNCTION_NONE; workflow_layer[fc8]->arguments.forward_fully_connected.weights = workflow_layer_factor[fc8_weights]; workflow_layer[fc8]->arguments.forward_fully_connected.biases = workflow_layer_factor[fc8_biases]; workflow_layer[fc8]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[fc8]->output_format[0].format_1d ={{1000}}; } // ------------------------------------------------------------------------------------------ // STAGE 09 (softmax) // output: 1000 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[fc8],0}; di->workflow_item_create_function(&workflow_layer[softmax],1,&inputs_descriptor,1); workflow_layer[softmax]->type = NN_WORK_ITEM_TYPE_SOFTMAX; workflow_layer[softmax]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[softmax]->output_format[0].format_1d ={{1000}}; } // ------------------------------------------------------------------------------------------ // STAGE 10 (output) // output: 1000 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[softmax],0}; di->workflow_item_create_function(&workflow_layer[output],1,&inputs_descriptor,1); workflow_layer[output]->type = NN_WORK_ITEM_TYPE_OUTPUT; workflow_layer[output]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[output]->output_format[0].format_1d ={{1000}}; } // ------------------------------------------------------------------------------------------- // END of workflow stages definition // ------------------------------------------------------------------------------------------- workflow->input[0] = workflow_layer[input]; workflow->output[0] = workflow_layer[output]; // ------------------------------------------------------------------------------------------- return workflow; }