コード例 #1
0
    virtual nn_workflow_t *init_test_workflow( nn_device_interface_0_t *_di ) {

        if(!is_valid()) throw std::runtime_error( error_ );

        for(auto wi : workflow_layer) wi = nullptr;

        this->di = _di;

        di->workflow_create_function( &workflow, 1, 1 );

        // STAGE 0 (input)
        {
            di->workflow_item_create_function( &workflow_layer[input], 0, nullptr, 1 );

            workflow_layer[input]->type = NN_WORK_ITEM_TYPE_INPUT;
            workflow_layer[input]->arguments.input.index = 0;
            workflow_layer[input]->output_format[0].format = NN_DATA_FORMAT_1D;
            workflow_layer[input]->output_format[0].format_1d = { { relu_length } };
        }

        // STAGE 1 relu
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[input], 0 };
            di->workflow_item_create_function( &workflow_layer[relu], 1, &inputs_descriptor, 1 );

            workflow_layer[relu]->type = NN_WORK_ITEM_TYPE_RELU;

            workflow_layer[relu]->output_format[0].format = NN_DATA_FORMAT_1D;
            workflow_layer[relu]->output_format[0].format_1d = { { relu_length } };
        }
        // ------------------------------------------------------------------------------------------
        // STAGE 2 output
    {
        nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[relu], 0 };
        di->workflow_item_create_function( &workflow_layer[output], 1, &inputs_descriptor, 1 );

        workflow_layer[output]->type = NN_WORK_ITEM_TYPE_OUTPUT;

        workflow_layer[output]->output_format[0].format = NN_DATA_FORMAT_1D;
        workflow_layer[output]->output_format[0].format_3d = { { relu_length } };

    }
    // -------------------------------------------------------------------------------------------
    // END of workflow stages definition
    workflow->input[0] = workflow_layer[input];
    workflow->output[0] = workflow_layer[output];
    // -------------------------------------------------------------------------------------------

    return workflow;
    }
コード例 #2
0
    virtual nn_workflow_t *init_workflow(nn_device_interface_0_t *di){

        if(!is_valid()) throw std::runtime_error(error_);

        this->di = di;

        std::cout
            << "--------------------------------------------------------"
            << std::endl
            << "Loading weights and biases"
            << std::endl << std::endl;

        // Load weights and biases
        auto load_biases_or_weights = [](std::string wb_file_name) {
            nn::data<float> *wb_pointer = nn_data_load_from_file_time_measure(wb_file_name);
            if(wb_pointer == nullptr) {
                std::cerr << "Can't load " << wb_file_name << std::endl;
                throw;
            }
            return wb_pointer;
        };

        try {
            nnwrkld_conv1_weights = load_biases_or_weights("weights_lenet/conv1.nn");
            nnwrkld_conv1_biases = load_biases_or_weights("weights_lenet/conv1_bias.nn");
            nnwrkld_conv2_weights = load_biases_or_weights("weights_lenet/conv2.nn");
            nnwrkld_conv2_biases = load_biases_or_weights("weights_lenet/conv2_bias.nn");
            nnwrkld_fc1_weights = load_biases_or_weights("weights_lenet/ip1.nn");
            nnwrkld_fc1_biases = load_biases_or_weights("weights_lenet/ip1_bias.nn");
            nnwrkld_fc2_weights = load_biases_or_weights("weights_lenet/ip2.nn");
            nnwrkld_fc2_biases = load_biases_or_weights("weights_lenet/ip2_bias.nn");
        }
        catch(...) {
            return workflow;
        }

        std::cout
            << "--------------------------------------------------------" << std::endl
            << "Build of workflow" << std::endl;

        di->workflow_create_function(&workflow, 1, 1);

        // ------------------------------------------------------------------------------------------
        // STAGE 0 (input)
        //         output: 28x28x3
        {
            di->workflow_item_create_function(&wrkflwi_input, 0, nullptr, 1);

            wrkflwi_input->type = NN_WORK_ITEM_TYPE_INPUT;
            wrkflwi_input->arguments.input.index = 0;
            wrkflwi_input->output_format[0].format = NN_DATA_FORMAT_2D;     
            wrkflwi_input->output_format[0].format_3d ={ { img_size, img_size} };
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 01
        //           convo: 5x5 stride 1x1; no-activation; output: 24x24x20
        //         maxpool: 2x2 stride 2x2;
        //          output: 12x12x20
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_input, 0 };
            di->workflow_item_create_function(&wrkflwi_stage_1_conv, 1, &inputs_descriptor, 1);

            wrkflwi_stage_1_conv->type = NN_WORK_ITEM_TYPE_CONVOLUTION;
            wrkflwi_stage_1_conv->name = "c1";

            wrkflwi_stage_1_conv->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO;
            wrkflwi_stage_1_conv->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_NONE;

            // We have weights, biases for 20 filters , but we want to have for four more filters so lets add padding
            wrkflwi_stage_1_conv->arguments.forward_convolution.weights = nn_data_extend_weights_by_padding(nnwrkld_conv1_weights,1,24);
            wrkflwi_stage_1_conv->arguments.forward_convolution.biases = nn_data_extend_biases_by_padding(nnwrkld_conv1_biases,24);

            wrkflwi_stage_1_conv->arguments.forward_convolution.center_offset[0] = 0;
            wrkflwi_stage_1_conv->arguments.forward_convolution.center_offset[1] = 0;

            wrkflwi_stage_1_conv->arguments.forward_convolution.stride[0] = 1;
            wrkflwi_stage_1_conv->arguments.forward_convolution.stride[1] = 1;

            wrkflwi_stage_1_conv->output_format[0].format = NN_DATA_FORMAT_3D;
            // It should be 20 output FM , but we do support only case when output FM number is divisble by 8
            wrkflwi_stage_1_conv->output_format[0].format_3d ={ { 24, 24, 24 } };
        }
        
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_stage_1_conv, 0 };
            di->workflow_item_create_function(&wrkflwi_stage_1_pool, 1, &inputs_descriptor, 1);

            wrkflwi_stage_1_pool->type = NN_WORK_ITEM_TYPE_POOLING;
            wrkflwi_stage_1_pool->name = "p1";

            wrkflwi_stage_1_pool->arguments.forward_pooling.mode = NN_POOLING_MODE_MAX;
            wrkflwi_stage_1_pool->arguments.forward_pooling.size[0] = 2;
            wrkflwi_stage_1_pool->arguments.forward_pooling.size[1] = 2;
            wrkflwi_stage_1_pool->arguments.forward_pooling.stride[0] = 2;
            wrkflwi_stage_1_pool->arguments.forward_pooling.stride[1] = 2;

            wrkflwi_stage_1_pool->output_format[0].format = NN_DATA_FORMAT_3D;
            wrkflwi_stage_1_pool->output_format[0].format_3d ={ { 12, 12, 24 } };
        }
        // view
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_stage_1_pool, 0 };
            di->workflow_item_create_function(&wrkflwi_stage_1_subv, 1, &inputs_descriptor, 1); // view 

            wrkflwi_stage_1_subv->type = NN_WORK_ITEM_TYPE_VIEW;
            wrkflwi_stage_1_subv->arguments.view.origin[0] = 0;
            wrkflwi_stage_1_subv->arguments.view.origin[1] = 0;
            wrkflwi_stage_1_subv->arguments.view.origin[2] = 0;

            wrkflwi_stage_1_subv->output_format[0].format = NN_DATA_FORMAT_3D;
            wrkflwi_stage_1_subv->output_format[0].format_3d ={ { 12, 12, 20 } };
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 02
        //           convo: 5x5 stride 1x1; no-activation; output: 8x8x50
        //         maxpool: 2x2 stride 2x2;
        //          output: 4x4x50

        // convolution 2 
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_stage_1_subv, 0 };
            di->workflow_item_create_function(&wrkflwi_stage_2_conv, 1, &inputs_descriptor, 1);

            wrkflwi_stage_2_conv->type = NN_WORK_ITEM_TYPE_CONVOLUTION;
            wrkflwi_stage_2_conv->name = "c2";

            wrkflwi_stage_2_conv->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_NONE;
            wrkflwi_stage_2_conv->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO;

            wrkflwi_stage_2_conv->arguments.forward_convolution.weights = nn_data_extend_weights_by_padding(nnwrkld_conv2_weights,20,56);
            wrkflwi_stage_2_conv->arguments.forward_convolution.biases = nn_data_extend_biases_by_padding(nnwrkld_conv2_biases,56);

            wrkflwi_stage_2_conv->arguments.forward_convolution.center_offset[0] = 0;
            wrkflwi_stage_2_conv->arguments.forward_convolution.center_offset[1] = 0;

            wrkflwi_stage_2_conv->arguments.forward_convolution.stride[0] = 1;
            wrkflwi_stage_2_conv->arguments.forward_convolution.stride[1] = 1;

            wrkflwi_stage_2_conv->output_format[0].format = NN_DATA_FORMAT_3D;
            // It should be 50 output FM , but we do support only case when output FM number is divisble by 8
            wrkflwi_stage_2_conv->output_format[0].format_3d ={ { 8, 8, 56 } };
        }

        // maxpool: 2x2 stride 2x2;
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_stage_2_conv, 0 };
            di->workflow_item_create_function(&wrkflwi_stage_2_pool, 1, &inputs_descriptor, 1); // pooling

            wrkflwi_stage_2_pool->type = NN_WORK_ITEM_TYPE_POOLING;
            wrkflwi_stage_2_pool->name = "p2";

            wrkflwi_stage_2_pool->arguments.forward_pooling.mode = NN_POOLING_MODE_MAX;

            wrkflwi_stage_2_pool->arguments.forward_pooling.size[0] = 2;
            wrkflwi_stage_2_pool->arguments.forward_pooling.size[1] = 2;

            wrkflwi_stage_2_pool->arguments.forward_pooling.stride[0] = 2;
            wrkflwi_stage_2_pool->arguments.forward_pooling.stride[1] = 2;

            wrkflwi_stage_2_pool->output_format[0].format = NN_DATA_FORMAT_3D;
            wrkflwi_stage_2_pool->output_format[0].format_3d ={ { 4, 4, 56 } };
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 03
        //            full: ReLU
        //          output: 500
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_stage_2_pool, 0 };
            di->workflow_item_create_function(&wrkflwi_stage_3_fc, 1, &inputs_descriptor, 1);

            wrkflwi_stage_3_fc->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED;
            wrkflwi_stage_3_fc->name = "fc1";

            wrkflwi_stage_3_fc->arguments.forward_fully_connected.activation.function = NN_ACTIVATION_FUNCTION_RELU;


            // Generated weights if taken from caffe , are in 2D format while we need them in 4d format
            nn::data<float>* nnwrkld_fc1_converted_weights = nn_data_convert_weights_2D_to_4D(nnwrkld_fc1_weights, 
                                                                                              4, 
                                                                                              4,
                                                                                              50,
                                                                                              nnwrkld_fc1_weights->size[1]);
            // release original weights
            delete nnwrkld_fc1_weights;
            // Extend weights' depth of FC layer to match extended weights input
            nnwrkld_fc1_weights = nn_data_extend_weights_by_padding(nnwrkld_fc1_converted_weights,56,nnwrkld_fc1_converted_weights->size[3]);
            delete nnwrkld_fc1_converted_weights;
            nnwrkld_fc1_converted_weights = nullptr;

            wrkflwi_stage_3_fc->arguments.forward_fully_connected.weights = nnwrkld_fc1_weights;
            wrkflwi_stage_3_fc->arguments.forward_fully_connected.biases = nnwrkld_fc1_biases;

            wrkflwi_stage_3_fc->output_format[0].format = NN_DATA_FORMAT_1D;
            wrkflwi_stage_3_fc->output_format[0].format_1d ={ { 500 } };
        }
 
        // ------------------------------------------------------------------------------------------
        // STAGE 04
        //            full: ;
        //          output: 10
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_stage_3_fc, 0 };
            di->workflow_item_create_function(&wrkflwi_stage_4_fc, 1, &inputs_descriptor, 1);

            wrkflwi_stage_4_fc->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED;
            wrkflwi_stage_4_fc->name = "fc2";

            wrkflwi_stage_4_fc->arguments.forward_fully_connected.activation.function = NN_ACTIVATION_FUNCTION_NONE;

            wrkflwi_stage_4_fc->arguments.forward_fully_connected.weights = nnwrkld_fc2_weights;
            wrkflwi_stage_4_fc->arguments.forward_fully_connected.biases = nnwrkld_fc2_biases;

            wrkflwi_stage_4_fc->output_format[0].format = NN_DATA_FORMAT_1D;
            wrkflwi_stage_4_fc->output_format[0].format_1d ={ { 10 } };
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 05 (softmax)
        //          output: 10
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_stage_4_fc, 0 };
            di->workflow_item_create_function(&wrkflwi_softmax, 1, &inputs_descriptor, 1);

            wrkflwi_softmax->type = NN_WORK_ITEM_TYPE_SOFTMAX;

            wrkflwi_softmax->output_format[0].format = NN_DATA_FORMAT_1D;
            wrkflwi_softmax->output_format[0].format_1d ={ { 10 } };
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 6 (output)
        //          output: 10
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { wrkflwi_softmax, 0 };
            di->workflow_item_create_function(&wrkflwi_output, 1, &inputs_descriptor, 1);

            wrkflwi_output->type = NN_WORK_ITEM_TYPE_OUTPUT;

            wrkflwi_output->output_format[0].format = NN_DATA_FORMAT_1D;
            wrkflwi_output->output_format[0].format_1d ={ { 10 } };

        }

        // -------------------------------------------------------------------------------------------
        // END of workflow stages definition
        // -------------------------------------------------------------------------------------------
        workflow->input[0] = wrkflwi_input;
        workflow->output[0] = wrkflwi_output;
        // -------------------------------------------------------------------------------------------

        return workflow;
    }
コード例 #3
0
    virtual nn_workflow_t *init_test_workflow(nn_device_interface_0_t *_di) {

        if(!is_valid()) throw std::runtime_error(error_);

        this->di = _di;

            // load nn:data factors (weights and biases) for successive layers
            mean_factor = nn_data_load_from_file("weights_caffenet/imagenet_mean.nnd");
            workflow_layer_weights_float[conv1_factor] = nn_data_load_from_file("weights_caffenet/conv1.nnd");
            workflow_layer_biases_float[conv1_factor] = nn_data_load_from_file("weights_caffenet/conv1_bias.nnd");
            workflow_layer_weights_float[conv2_1_factor] = nn_data_load_from_file("weights_caffenet/conv2_g1.nnd");
            workflow_layer_biases_float[conv2_1_factor] = nn_data_load_from_file("weights_caffenet/conv2_bias_g1.nnd");
            workflow_layer_weights_float[conv2_2_factor] = nn_data_load_from_file("weights_caffenet/conv2_g2.nnd");
            workflow_layer_biases_float[conv2_2_factor] = nn_data_load_from_file("weights_caffenet/conv2_bias_g2.nnd");
            workflow_layer_weights_float[conv3_factor] = nn_data_load_from_file("weights_caffenet/conv3.nnd");
            workflow_layer_biases_float[conv3_factor] = nn_data_load_from_file("weights_caffenet/conv3_bias.nnd");
            workflow_layer_weights_float[conv4_1_factor] = nn_data_load_from_file("weights_caffenet/conv4_g1.nnd");
            workflow_layer_biases_float[conv4_1_factor] = nn_data_load_from_file("weights_caffenet/conv4_bias_g1.nnd");
            workflow_layer_weights_float[conv4_2_factor] = nn_data_load_from_file("weights_caffenet/conv4_g2.nnd");
            workflow_layer_biases_float[conv4_2_factor] = nn_data_load_from_file("weights_caffenet/conv4_bias_g2.nnd");
            workflow_layer_weights_float[conv5_1_factor] = nn_data_load_from_file("weights_caffenet/conv5_g1.nnd");
            workflow_layer_biases_float[conv5_1_factor] = nn_data_load_from_file("weights_caffenet/conv5_bias_g1.nnd");
            workflow_layer_weights_float[conv5_2_factor] = nn_data_load_from_file("weights_caffenet/conv5_g2.nnd");
            workflow_layer_biases_float[conv5_2_factor] = nn_data_load_from_file("weights_caffenet/conv5_bias_g2.nnd");
            workflow_layer_weights_float[fc6_factor] = nn_data_load_from_file("weights_caffenet/fc6.nnd");
            workflow_layer_biases_float[fc6_factor] = nn_data_load_from_file("weights_caffenet/fc6_bias.nnd");
            workflow_layer_weights_float[fc7_factor] = nn_data_load_from_file("weights_caffenet/fc7.nnd");
            workflow_layer_biases_float[fc7_factor] = nn_data_load_from_file("weights_caffenet/fc7_bias.nnd");
            workflow_layer_weights_float[fc8_factor] = nn_data_load_from_file("weights_caffenet/fc8.nnd");
            workflow_layer_biases_float[fc8_factor] = nn_data_load_from_file("weights_caffenet/fc8_bias.nnd");

            for (auto wlwf : workflow_layer_weights_float)
               if (wlwf == nullptr)
                  throw  std::runtime_error("error: one or more of file with weights was not loaded");
            for (auto wlbf : workflow_layer_biases_float)
               if (wlbf == nullptr)
                  throw  std::runtime_error("error: one or more of file with biases was not loaded");

        di->workflow_create_function(&workflow,1,1);

        //                                                            { c1    c2_1  c2_2  c3    c4_1  c4_2  c5_1  c5_2  fc6   fc7   fc8   }
        const size_t nnwrkld_accumulator_fraction[last_factor+1]    = { 16,   19,   17,   22,   22,   22,   23,   22,   24,   26,   24    };
        const size_t nnwrkld_output_fraction[last_factor+1]         = { 3,    7,    7,    6,    7,    7,    8,    8,    10,   12,   26    };
        const size_t nnwrkld_weights_float_fraction[last_factor+1]  = { 16,   16,   14,   15,   16,   16,   16,   15,   16,   16,   12    };
        const size_t nnwrkld_biases_float_fraction[last_factor+1]   = {nnwrkld_accumulator_fraction[conv1_factor],
                                                                       nnwrkld_accumulator_fraction[conv2_1_factor],
                                                                       nnwrkld_accumulator_fraction[conv2_2_factor],
                                                                       nnwrkld_accumulator_fraction[conv3_factor],
                                                                       nnwrkld_accumulator_fraction[conv4_1_factor],
                                                                       nnwrkld_accumulator_fraction[conv4_2_factor],
                                                                       nnwrkld_accumulator_fraction[conv5_1_factor],
                                                                       nnwrkld_accumulator_fraction[conv5_2_factor],
                                                                       nnwrkld_accumulator_fraction[fc6_factor],
                                                                       nnwrkld_accumulator_fraction[fc7_factor],
                                                                       nnwrkld_accumulator_fraction[fc8_factor]
                                                                      };
        for(auto i = 0; i<=last_factor;++i) {
            workflow_layer_weights_int16[i] = new nn::data<int16_t>(static_cast<const size_t*>(workflow_layer_weights_float[i]->size),workflow_layer_weights_float[i]->dimension);
            workflow_layer_biases_int32[i] = new nn::data<int32_t>(static_cast<const size_t*>(workflow_layer_biases_float[i]->size),workflow_layer_biases_float[i]->dimension);
            nn_data_convert_float_to_int16_fixedpoint(workflow_layer_weights_float[i],workflow_layer_weights_int16[i],1 << nnwrkld_weights_float_fraction[i]);
            nn_data_convert_float_to_int32_fixedpoint(workflow_layer_biases_float[i],workflow_layer_biases_int32[i],1 << nnwrkld_biases_float_fraction[i]);
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 0 (input)
        //         output: 227x227x3
        {
            di->workflow_item_create_function(&workflow_layer[input],0,nullptr,1);

            workflow_layer[input]->type = NN_WORK_ITEM_TYPE_INPUT;
            workflow_layer[input]->arguments.input.index = 0;
            workflow_layer[input]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[input]->output_format[0].format_3d ={{img_size,img_size,3}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 0 (imagenet_mean_subtract)
        //         output: 227x227x3
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[input],0};
            di->workflow_item_create_function(&workflow_layer[mean_substract],1,&inputs_descriptor,1);

            workflow_layer[mean_substract]->type = NN_WORK_ITEM_TYPE_ARITHMETIC;
            workflow_layer[mean_substract]->arguments.forward_arithmetic.factor = mean_factor;
            workflow_layer[mean_substract]->arguments.forward_arithmetic.arithmetic_function = NN_ARITHMETIC_FUNCTION_SUBTRACTION;

            workflow_layer[mean_substract]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[mean_substract]->output_format[0].format_3d ={{img_size,img_size,3}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 0 Convert float to int16
        //
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[mean_substract], 0 };
            di->workflow_item_create_function(&workflow_layer[convert], 1, &inputs_descriptor, 1);

            workflow_layer[convert]->type = NN_WORK_ITEM_TYPE_CONVERT_FLOAT_TO_INT16_FIXEDPOINT;
            workflow_layer[convert]->arguments.forward_convert_float_to_int16_fixedpoint.output_fraction = 0;

            workflow_layer[convert]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[convert]->output_format[0].format_3d = nn_output_format_3d{ { img_size, img_size, 4 } };
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 01
        //           convo: 11x11 stride 4x4; ReLU; output: 55x55x96
        //         maxpool: 3x3 stride 2x2;
        //            norm: RESPONSE_ACROSS_MAPS
        //          output: 27x27x96
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[convert], 0 };
            di->workflow_item_create_function(&workflow_layer[conv1], 1, &inputs_descriptor, 1);

            workflow_layer[conv1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT;
            workflow_layer[conv1]->name = "c1";

            workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO;
            workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU;

            workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv1_factor];
            workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv1_factor];

            workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 0;
            workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 0;

            workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 4;
            workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 4;

            workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv1_factor];
            workflow_layer[conv1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv1_factor];

            workflow_layer[conv1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv1]->output_format[0].format_3d = { { 55, 55, 96 } };
        }

        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[conv1], 0 };
            di->workflow_item_create_function(&workflow_layer[pool1], 1, &inputs_descriptor, 1);

            workflow_layer[pool1]->type = NN_WORK_ITEM_TYPE_MAX_POOLING_INT16_FIXEDPOINT;
            workflow_layer[pool1]->name = "p1";

            workflow_layer[pool1]->arguments.forward_pooling_fixedpoint.pool_size[0] = 3;
            workflow_layer[pool1]->arguments.forward_pooling_fixedpoint.pool_size[1] = 3;
            workflow_layer[pool1]->arguments.forward_pooling_fixedpoint.pool_stride[0] = 2;
            workflow_layer[pool1]->arguments.forward_pooling_fixedpoint.pool_stride[1] = 2;

            workflow_layer[pool1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[pool1]->output_format[0].format_3d = { { 27, 27, 96 } };
        }

        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[pool1], 0 };
            di->workflow_item_create_function(&workflow_layer[norm1], 1, &inputs_descriptor, 1);

            workflow_layer[norm1]->type = NN_WORK_ITEM_TYPE_NORMALIZATION_RESPONSE_ACROSS_MAPS_FORWARD_I16QN;
            workflow_layer[norm1]->name = "lrn1";

            workflow_layer[norm1]->arguments.normalization_response_across_maps_forward_i16qn.k = 1;
            workflow_layer[norm1]->arguments.normalization_response_across_maps_forward_i16qn.n = 5;
            workflow_layer[norm1]->arguments.normalization_response_across_maps_forward_i16qn.alpha = 0.00002f;
            workflow_layer[norm1]->arguments.normalization_response_across_maps_forward_i16qn.beta = 0.75f;
            workflow_layer[norm1]->arguments.normalization_response_across_maps_forward_i16qn.fractions.input = nnwrkld_output_fraction[conv1_factor];
            workflow_layer[norm1]->arguments.normalization_response_across_maps_forward_i16qn.fractions.output = nnwrkld_output_fraction[conv1_factor];

            workflow_layer[norm1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[norm1]->output_format[0].format_3d = { { 27, 27, 96 } };
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 02
        //           split: 2 (z-axis 96/2); output 27x27x(2*96/2)
        //           convo: 5x5 stride 1x1; ReLU; 0-padded output: 27x27x(2*256/2)
        //           merge: (z-axis)
        //         maxpool: 3x3 stride 2x2;
        //            norm: RESPONSE_ACROSS_MAPS
        //          output: 13x13x256
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[norm1], 0 };
            di->workflow_item_create_function(&workflow_layer[subv1_1], 1, &inputs_descriptor, 1); // view g1

            workflow_layer[subv1_1]->type = NN_WORK_ITEM_TYPE_VIEW;
            workflow_layer[subv1_1]->arguments.view.origin[0] = 0;
            workflow_layer[subv1_1]->arguments.view.origin[1] = 0;
            workflow_layer[subv1_1]->arguments.view.origin[2] = 0;

            workflow_layer[subv1_1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[subv1_1]->output_format[0].format_3d = { { 27, 27, 96 / 2 } };

        }

        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[norm1], 0 };
            di->workflow_item_create_function(&workflow_layer[subv1_2], 1, &inputs_descriptor, 1);   // view g2

            workflow_layer[subv1_2]->type = NN_WORK_ITEM_TYPE_VIEW;
            workflow_layer[subv1_2]->arguments.view.origin[0] = 0;
            workflow_layer[subv1_2]->arguments.view.origin[1] = 0;
            workflow_layer[subv1_2]->arguments.view.origin[2] = (96 / 2);

            workflow_layer[subv1_2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[subv1_2]->output_format[0].format_3d = { { 27, 27, 96 / 2 } };
        }

        // convolution 2, g1: 5x5 stride 1x1; ReLU; 0-padded output: 13x13x(2*96/2)
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[subv1_1], 0 };
            di->workflow_item_create_function(&workflow_layer[conv2_1], 1, &inputs_descriptor, 1);

            workflow_layer[conv2_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT;
            workflow_layer[conv2_1]->name = "c2g1";

            workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU;
            workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO;

            workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv2_1_factor];
            workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv2_1_factor];

            workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 2;
            workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 2;

            workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 1;
            workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 1;

            workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv2_1_factor];
            workflow_layer[conv2_1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv2_1_factor];

            workflow_layer[conv2_1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv2_1]->output_format[0].format_3d = { { 27, 27, 256 / 2 } };
        }

        // convolution 2, g2: 5x5 stride 1x1; ReLU; 0-padded output: 13x13x(2*96/2)
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[subv1_2], 0 };
            di->workflow_item_create_function(&workflow_layer[conv2_2], 1, &inputs_descriptor, 1);

            workflow_layer[conv2_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT;
            workflow_layer[conv2_2]->name = "c2g2";

            workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU;
            workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO;

            workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv2_2_factor];
            workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv2_2_factor];

            workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 2;
            workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 2;

            workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 1;
            workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 1;

            workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv2_2_factor];
            workflow_layer[conv2_2]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv2_2_factor];

            workflow_layer[conv2_2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv2_2]->output_format[0].format_3d = { { 27, 27, 256 / 2 } };
        }

        // merge g1 and g2
        {
            nn_workflow_use_descriptor_t inputs_descriptor[] = { { workflow_layer[conv2_1], 0 }, { workflow_layer[conv2_2], 0 } };
            di->workflow_item_create_function(&workflow_layer[merge2], 2, inputs_descriptor, 1);

            workflow_layer[merge2]->type = NN_WORK_ITEM_TYPE_MERGE;
            workflow_layer[merge2]->arguments.forward_merge.axis = 2; // value 2 for z-axis

            workflow_layer[merge2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[merge2]->output_format[0].format_3d = { { 27, 27, 256 } };
        }

        // maxpool: 3x3 stride 2x2;
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[merge2], 0 };
            di->workflow_item_create_function(&workflow_layer[pool2], 1, &inputs_descriptor, 1); // pooling

            workflow_layer[pool2]->type = NN_WORK_ITEM_TYPE_MAX_POOLING_INT16_FIXEDPOINT;
            workflow_layer[pool2]->name = "p2";

            workflow_layer[pool2]->arguments.forward_pooling_fixedpoint.pool_size[0] = 3;
            workflow_layer[pool2]->arguments.forward_pooling_fixedpoint.pool_size[1] = 3;

            workflow_layer[pool2]->arguments.forward_pooling_fixedpoint.pool_stride[0] = 2;
            workflow_layer[pool2]->arguments.forward_pooling_fixedpoint.pool_stride[1] = 2;

            workflow_layer[pool2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[pool2]->output_format[0].format_3d = { { 13, 13, 256 } };
        }

        //norm: RESPONSE_ACROSS_MAPS; output: 13x13x256
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[pool2], 0 };
            di->workflow_item_create_function(&workflow_layer[norm2], 1, &inputs_descriptor, 1);

            workflow_layer[norm2]->type = NN_WORK_ITEM_TYPE_NORMALIZATION_RESPONSE_ACROSS_MAPS_FORWARD_I16QN;
            workflow_layer[norm2]->name = "lrn2";

            workflow_layer[norm2]->arguments.normalization_response_across_maps_forward_i16qn.k = 1;
            workflow_layer[norm2]->arguments.normalization_response_across_maps_forward_i16qn.n = 5;
            workflow_layer[norm2]->arguments.normalization_response_across_maps_forward_i16qn.alpha = 0.00002f;
            workflow_layer[norm2]->arguments.normalization_response_across_maps_forward_i16qn.beta = 0.75f;
            workflow_layer[norm2]->arguments.normalization_response_across_maps_forward_i16qn.fractions.input = nnwrkld_output_fraction[conv2_2_factor];
            workflow_layer[norm2]->arguments.normalization_response_across_maps_forward_i16qn.fractions.output = nnwrkld_output_fraction[conv2_2_factor];

            workflow_layer[norm2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[norm2]->output_format[0].format_3d = { { 13, 13, 256 } };
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 03
        //           convo: 3x3 stride 1x1; ReLU; 0-padded
        //          output: 13x13x384
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[norm2], 0 };
            di->workflow_item_create_function(&workflow_layer[conv3], 1, &inputs_descriptor, 1);

            workflow_layer[conv3]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT;
            workflow_layer[conv3]->name = "c3";

            workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU;
            workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO;

            workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv3_factor];
            workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv3_factor];

            workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 1;
            workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 1;

            workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 1;
            workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 1;

            workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv3_factor];
            workflow_layer[conv3]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv3_factor];

            workflow_layer[conv3]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv3]->output_format[0].format_3d = { { 13, 13, 384 } };
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 04
        //           split: 2 (z-axis 384/2)
        //           convo: 3x3 stride 1x1; ReLU; 0-padded
        //          output: 13x13x(2*384/2) (continue split to next stage)
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[conv3], 0 };
            di->workflow_item_create_function(&workflow_layer[subv3_1], 1, &inputs_descriptor, 1); // view g1

            workflow_layer[subv3_1]->type = NN_WORK_ITEM_TYPE_VIEW;
            workflow_layer[subv3_1]->arguments.view.origin[0] = 0;
            workflow_layer[subv3_1]->arguments.view.origin[1] = 0;
            workflow_layer[subv3_1]->arguments.view.origin[2] = 0;

            workflow_layer[subv3_1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[subv3_1]->output_format[0].format_3d = { { 13, 13, 384 / 2 } };
        }

        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[conv3], 0 };
            di->workflow_item_create_function(&workflow_layer[subv3_2], 1, &inputs_descriptor, 1); // view g2

            workflow_layer[subv3_2]->type = NN_WORK_ITEM_TYPE_VIEW;
            workflow_layer[subv3_2]->arguments.view.origin[0] = 0;
            workflow_layer[subv3_2]->arguments.view.origin[1] = 0;
            workflow_layer[subv3_2]->arguments.view.origin[2] = 384 / 2;

            workflow_layer[subv3_2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[subv3_2]->output_format[0].format_3d = { { 13, 13, 384 / 2 } };

        }

        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[subv3_1], 0 };
            di->workflow_item_create_function(&workflow_layer[conv4_1], 1, &inputs_descriptor, 1); // conv g1

            workflow_layer[conv4_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT;
            workflow_layer[conv4_1]->name = "c4g1";

            workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU;
            workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO;

            workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv4_1_factor];
            workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv4_1_factor];

            workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 1;
            workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 1;

            workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 1;
            workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 1;

            workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv4_1_factor];
            workflow_layer[conv4_1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv4_1_factor];

            workflow_layer[conv4_1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv4_1]->output_format[0].format_3d = { { 13, 13, 384 / 2 } };
        }

        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[subv3_2], 0 };
            di->workflow_item_create_function(&workflow_layer[conv4_2], 1, &inputs_descriptor, 1); // conv g2

            workflow_layer[conv4_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT;
            workflow_layer[conv4_2]->name = "c4g2";

            workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU;
            workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO;

            workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv4_2_factor];
            workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv4_2_factor];

            workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 1;
            workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 1;

            workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 1;
            workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 1;

            workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv4_2_factor];
            workflow_layer[conv4_2]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv4_2_factor];

            workflow_layer[conv4_2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv4_2]->output_format[0].format_3d = { { 13, 13, 384 / 2 } };
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 05
        //           convo: 3x3 stride 1x1; ReLU; 0-padded; output: 13x13x(2*256/2)
        //           merge: (z-axis)
        //         maxpool: 3x3 stride 2x2;
        //          output: 13x13x256
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[conv4_1], 0 };
            di->workflow_item_create_function(&workflow_layer[conv5_1], 1, &inputs_descriptor, 1); // conv g1

            workflow_layer[conv5_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT;
            workflow_layer[conv5_1]->name = "c5g1";

            workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU;
            workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO;

            workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv5_1_factor];
            workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv5_1_factor];

            workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 1;
            workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 1;

            workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 1;
            workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 1;

            workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv5_1_factor];
            workflow_layer[conv5_1]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv5_1_factor];

            workflow_layer[conv5_1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv5_1]->output_format[0].format_3d = { { 13, 13, 256 / 2 } };
        }

        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[conv4_2], 0 };
            di->workflow_item_create_function(&workflow_layer[conv5_2], 1, &inputs_descriptor, 1); // conv g2

            workflow_layer[conv5_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION_INT16_FIXEDPOINT;
            workflow_layer[conv5_2]->name = "c5g2";

            workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU;
            workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.padding = NN_PADDING_MODE_DATA_OR_ZERO;

            workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.weights = workflow_layer_weights_int16[conv5_2_factor];
            workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.biases = workflow_layer_biases_int32[conv5_2_factor];

            workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.center_offset[0] = 1;
            workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.center_offset[1] = 1;

            workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.stride[0] = 1;
            workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.stride[1] = 1;

            workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.accumulator = nnwrkld_accumulator_fraction[conv5_2_factor];
            workflow_layer[conv5_2]->arguments.forward_convolution_int16_fixedpoint.activation.fractions.output = nnwrkld_output_fraction[conv5_2_factor];

            workflow_layer[conv5_2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv5_2]->output_format[0].format_3d = { { 13, 13, 256 / 2 } };
        }

        // merge g1 and g2
        {
            nn_workflow_use_descriptor_t inputs_descriptor[] = {{workflow_layer[conv5_1],0},{workflow_layer[conv5_2],0}};
            di->workflow_item_create_function(&workflow_layer[merge5], 2, inputs_descriptor, 1);

            workflow_layer[merge5]->type = NN_WORK_ITEM_TYPE_MERGE;
            workflow_layer[merge5]->arguments.forward_merge.axis = 2; // value 2 for z-axis

            workflow_layer[merge5]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[merge5]->output_format[0].format_3d = { { 13, 13, 256 } };
        }

        // maxpool: 3x3 stride 2x2;
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[merge5], 0 };
            di->workflow_item_create_function(&workflow_layer[pool5], 1, &inputs_descriptor, 1); // pooling

            workflow_layer[pool5]->type = NN_WORK_ITEM_TYPE_MAX_POOLING_INT16_FIXEDPOINT;
            workflow_layer[pool5]->name = "p5";

            workflow_layer[pool5]->arguments.forward_pooling_fixedpoint.pool_size[0] = 3;
            workflow_layer[pool5]->arguments.forward_pooling_fixedpoint.pool_size[1] = 3;

            workflow_layer[pool5]->arguments.forward_pooling_fixedpoint.pool_stride[0] = 2;
            workflow_layer[pool5]->arguments.forward_pooling_fixedpoint.pool_stride[1] = 2;

            workflow_layer[pool5]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.accumulator = 16;
            workflow_layer[pool5]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.output = 8;

            workflow_layer[pool5]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[pool5]->output_format[0].format_3d = { { 6, 6, 256 } };
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 06
        //            full: ReLU
        //          output: 4096
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[pool5], 0 };
            di->workflow_item_create_function(&workflow_layer[fc6], 1, &inputs_descriptor, 1);

            workflow_layer[fc6]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED_FORWARD_I16QN_I16QN;
            workflow_layer[fc6]->name = "fc6";

            workflow_layer[fc6]->arguments.fully_connected_forward_i16qn_i16qn.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU;

            workflow_layer[fc6]->arguments.fully_connected_forward_i16qn_i16qn.weights = workflow_layer_weights_int16[fc6_factor];
            workflow_layer[fc6]->arguments.fully_connected_forward_i16qn_i16qn.biases = workflow_layer_biases_int32[fc6_factor];

            workflow_layer[fc6]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.accumulator = nnwrkld_accumulator_fraction[fc6_factor];
            workflow_layer[fc6]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.output = nnwrkld_output_fraction[fc6_factor];

            workflow_layer[fc6]->output_format[0].format = NN_DATA_FORMAT_1D;
            workflow_layer[fc6]->output_format[0].format_1d = { { 4096 } };
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 07
        //            full: ReLU
        //          output: 4096
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[fc6], 0 };
            di->workflow_item_create_function(&workflow_layer[fc7], 1, &inputs_descriptor, 1);

            workflow_layer[fc7]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED_FORWARD_I16QN_I16QN;
            workflow_layer[fc7]->name = "fc7";

            workflow_layer[fc7]->arguments.fully_connected_forward_i16qn_i16qn.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_RELU;

            workflow_layer[fc7]->arguments.fully_connected_forward_i16qn_i16qn.weights = workflow_layer_weights_int16[fc7_factor];
            workflow_layer[fc7]->arguments.fully_connected_forward_i16qn_i16qn.biases = workflow_layer_biases_int32[fc7_factor];

            workflow_layer[fc7]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.accumulator = nnwrkld_accumulator_fraction[fc7_factor];
            workflow_layer[fc7]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.output = nnwrkld_output_fraction[fc7_factor];

            workflow_layer[fc7]->output_format[0].format = NN_DATA_FORMAT_1D;
            workflow_layer[fc7]->output_format[0].format_1d = { { 4096 } };
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 08
        //            full: ;
        //          output: 1000
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[fc7], 0 };
            di->workflow_item_create_function(&workflow_layer[fc8], 1, &inputs_descriptor, 1);

            workflow_layer[fc8]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED_FORWARD_I16QN_I32QN;
            workflow_layer[fc8]->name = "fc8";

            workflow_layer[fc8]->arguments.fully_connected_forward_i16qn_i32qn.activation.basic_arguments.function = NN_ACTIVATION_FUNCTION_NONE;

            workflow_layer[fc8]->arguments.fully_connected_forward_i16qn_i32qn.weights = workflow_layer_weights_int16[fc8_factor];
            workflow_layer[fc8]->arguments.fully_connected_forward_i16qn_i32qn.biases = workflow_layer_biases_int32[fc8_factor];

            workflow_layer[fc8]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.accumulator = nnwrkld_accumulator_fraction[fc8_factor];
            workflow_layer[fc8]->arguments.fully_connected_forward_i16qn_i32qn.activation.fractions.output = nnwrkld_output_fraction[fc8_factor];

            workflow_layer[fc8]->output_format[0].format = NN_DATA_FORMAT_1D;
            workflow_layer[fc8]->output_format[0].format_1d = { { 1000 } };
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 09 (softmax)
        //          output: 1000
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[fc8], 0 };
            di->workflow_item_create_function(&workflow_layer[softmax], 1, &inputs_descriptor, 1);

            workflow_layer[softmax]->type = NN_WORK_ITEM_TYPE_SOFTMAX_FIXEDPOINT;

            workflow_layer[softmax]->arguments.forward_softmax_fixedpoint.input_fraction = nnwrkld_output_fraction[fc8_factor];

            workflow_layer[softmax]->output_format[0].format = NN_DATA_FORMAT_1D;
            workflow_layer[softmax]->output_format[0].format_1d = { { 1000 } };
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 10 (output)
        //          output: 1000
        {
            nn_workflow_use_descriptor_t inputs_descriptor = { workflow_layer[softmax], 0 };
            di->workflow_item_create_function(&workflow_layer[output], 1, &inputs_descriptor, 1);

            workflow_layer[output]->type = NN_WORK_ITEM_TYPE_OUTPUT;

            workflow_layer[output]->output_format[0].format = NN_DATA_FORMAT_1D;
            workflow_layer[output]->output_format[0].format_1d = { { 1000 } };

        }

        // -------------------------------------------------------------------------------------------
        // END of workflow stages definition
        // -------------------------------------------------------------------------------------------
        workflow->input[0] = workflow_layer[input];
        workflow->output[0] = workflow_layer[output];
        // -------------------------------------------------------------------------------------------

        return workflow;

    }
コード例 #4
0
    virtual nn_workflow_t *init_test_workflow(nn_device_interface_0_t *_di) {

        if(!is_valid()) throw std::runtime_error(error_);

        for(auto wi : workflow_layer) wi = nullptr;
        for(auto wb : workflow_layer_factor) wb = nullptr;

        this->di = _di;




        // create and populate nn:data factors (weights and biases) for successive layers

        workflow_layer_factor[mean_factor] = new nn::data<float>(img_size,img_size,3);
        nn_data_populate(workflow_layer_factor[mean_factor],104.007f,122.679f);

        workflow_layer_factor[conv1_weights] = new nn::data<float>(11,11,3,96);
        nn_data_populate(workflow_layer_factor[conv1_weights],-0.374f,0.403f);

        workflow_layer_factor[conv1_biases] = new nn::data<float>(96);
        nn_data_populate(workflow_layer_factor[conv1_biases],-0.854f,0.232f);

        workflow_layer_factor[conv2_1_weights] = new nn::data<float>(5,5,48,128);
        nn_data_populate(workflow_layer_factor[conv2_1_weights],-0.285f,0.379f);

        workflow_layer_factor[conv2_1_biases] = new nn::data<float>(128);
        nn_data_populate(workflow_layer_factor[conv2_1_biases],0.974f,1.034f);

        workflow_layer_factor[conv2_2_weights] = new nn::data<float>(5,5,48,128);
        nn_data_populate(workflow_layer_factor[conv2_2_weights],-0.269f,0.416f);

        workflow_layer_factor[conv2_2_biases] = new nn::data<float>(128);
        nn_data_populate(workflow_layer_factor[conv2_2_biases],0.958f,1.027f);

        workflow_layer_factor[conv3_weights] = new nn::data<float>(3,3,256,384);
        nn_data_populate(workflow_layer_factor[conv3_weights],-0.185f,0.512f);

        workflow_layer_factor[conv3_biases] = new nn::data<float>(384);
        nn_data_populate(workflow_layer_factor[conv3_biases],-0.104f,0.093f);

        workflow_layer_factor[conv4_1_weights] = new nn::data<float>(3,3,192,192);
        nn_data_populate(workflow_layer_factor[conv4_1_weights],-0.103f,0.322f);

        workflow_layer_factor[conv4_1_biases] = new nn::data<float>(192);
        nn_data_populate(workflow_layer_factor[conv4_1_biases],0.844f,1.142f);

        workflow_layer_factor[conv4_2_weights] = new nn::data<float>(3,3,192,192);
        nn_data_populate(workflow_layer_factor[conv4_2_weights],-0.142f,0.353f);

        workflow_layer_factor[conv4_2_biases] = new nn::data<float>(192);
        nn_data_populate(workflow_layer_factor[conv4_2_biases],0.77f,1.219f);

        workflow_layer_factor[conv5_1_weights] = new nn::data<float>(3,3,192,128);
        nn_data_populate(workflow_layer_factor[conv5_1_weights],-0.092f,0.254f);

        workflow_layer_factor[conv5_1_biases] = new nn::data<float>(128);
        nn_data_populate(workflow_layer_factor[conv5_1_biases],0.723f,1.50f);

        workflow_layer_factor[conv5_2_weights] = new nn::data<float>(3,3,192,128);
        nn_data_populate(workflow_layer_factor[conv5_2_weights],-0.133f,0.315f);

        workflow_layer_factor[conv5_2_biases] = new nn::data<float>(128);
        nn_data_populate(workflow_layer_factor[conv5_2_biases],0.623f,1.742f);

        workflow_layer_factor[fc6_weights] = new nn::data<float>(6,6,256,4096);
        nn_data_populate(workflow_layer_factor[fc6_weights],-0.035f,0.048f);

        workflow_layer_factor[fc6_biases] = new nn::data<float>(4096);
        nn_data_populate(workflow_layer_factor[fc6_biases],0.92f,1.057f);

        workflow_layer_factor[fc7_weights] = new nn::data<float>(4096,4096);
        nn_data_populate(workflow_layer_factor[fc7_weights],-0.032f,0.052f);

        workflow_layer_factor[fc7_biases] = new nn::data<float>(4096);
        nn_data_populate(workflow_layer_factor[fc7_biases],0.741f,1.26f);

        workflow_layer_factor[fc8_weights] = new nn::data<float>(4096,1000);
        nn_data_populate(workflow_layer_factor[fc8_weights],-0.045f,0.067f);

        workflow_layer_factor[fc8_biases] = new nn::data<float>(1000);
        nn_data_populate(workflow_layer_factor[fc8_biases],-0.351f,0.425f);

        di->workflow_create_function(&workflow,1,1);
        // ------------------------------------------------------------------------------------------
        // STAGE 0 (input)
        //         output: 227x227x3
        {
            di->workflow_item_create_function(&workflow_layer[input],0,nullptr,1);

            workflow_layer[input]->type = NN_WORK_ITEM_TYPE_INPUT;
            workflow_layer[input]->arguments.input.index = 0;
            workflow_layer[input]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[input]->output_format[0].format_3d ={{img_size,img_size,3}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 0 (imagenet_mean_subtract)
        //         output: 227x227x3
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[input],0};
            di->workflow_item_create_function(&workflow_layer[mean_substract],1,&inputs_descriptor,1);

            workflow_layer[mean_substract]->type = NN_WORK_ITEM_TYPE_ARITHMETIC;
            workflow_layer[mean_substract]->arguments.forward_arithmetic.factor = workflow_layer_factor[mean_factor];
            workflow_layer[mean_substract]->arguments.forward_arithmetic.arithmetic_function = NN_ARITHMETIC_FUNCTION_SUBTRACTION;

            workflow_layer[mean_substract]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[mean_substract]->output_format[0].format_3d ={{img_size,img_size,3}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 01
        //           convo: 11x11 stride 4x4; ReLU; output: 55x55x96
        //         maxpool: 3x3 stride 2x2;
        //            norm: RESPONSE_ACROSS_MAPS
        //          output: 27x27x96
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[mean_substract],0};
            di->workflow_item_create_function(&workflow_layer[conv1],1,&inputs_descriptor,1);

            workflow_layer[conv1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION;
            workflow_layer[conv1]->name = "c1";

            workflow_layer[conv1]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO;
            workflow_layer[conv1]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU;

            workflow_layer[conv1]->arguments.forward_convolution.weights = workflow_layer_factor[conv1_weights];
            workflow_layer[conv1]->arguments.forward_convolution.biases = workflow_layer_factor[conv1_biases];

            workflow_layer[conv1]->arguments.forward_convolution.center_offset[0] = 0;
            workflow_layer[conv1]->arguments.forward_convolution.center_offset[1] = 0;

            workflow_layer[conv1]->arguments.forward_convolution.stride[0] = 4;
            workflow_layer[conv1]->arguments.forward_convolution.stride[1] = 4;

            workflow_layer[conv1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv1]->output_format[0].format_3d ={{55,55,96}};
        }

        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv1],0};
            di->workflow_item_create_function(&workflow_layer[pool1],1,&inputs_descriptor,1);

            workflow_layer[pool1]->type = NN_WORK_ITEM_TYPE_POOLING;
            workflow_layer[pool1]->name = "p1";

            workflow_layer[pool1]->arguments.forward_pooling.mode = NN_POOLING_MODE_MAX;
            workflow_layer[pool1]->arguments.forward_pooling.size[0] = 3;
            workflow_layer[pool1]->arguments.forward_pooling.size[1] = 3;
            workflow_layer[pool1]->arguments.forward_pooling.stride[0] = 2;
            workflow_layer[pool1]->arguments.forward_pooling.stride[1] = 2;

            workflow_layer[pool1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[pool1]->output_format[0].format_3d ={{27,27,96}};
        }

        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[pool1],0};
            di->workflow_item_create_function(&workflow_layer[norm1],1,&inputs_descriptor,1);

            workflow_layer[norm1]->type = NN_WORK_ITEM_TYPE_NORMALIZATION;
            workflow_layer[norm1]->name = "lrn1";

            workflow_layer[norm1]->arguments.forward_normalization.normalization.mode = NN_NORMALIZATION_MODE_RESPONSE_ACROSS_MAPS;
            workflow_layer[norm1]->arguments.forward_normalization.normalization.k = 1; // in Krishevsky's article is 2
            workflow_layer[norm1]->arguments.forward_normalization.normalization.n = 5;
            workflow_layer[norm1]->arguments.forward_normalization.normalization.alpha = 0.0001f/5; // in Krishevsky's paper is 1e-4,
            // but didn't write that sum of the squares
            // is divided by number of elements (n)
            workflow_layer[norm1]->arguments.forward_normalization.normalization.beta = 0.75f;

            workflow_layer[norm1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[norm1]->output_format[0].format_3d ={{27,27,96}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 02
        //           split: 2 (z-axis 96/2); output 27x27x(2*96/2)
        //           convo: 5x5 stride 1x1; ReLU; 0-padded output: 27x27x(2*256/2)
        //           merge: (z-axis)
        //         maxpool: 3x3 stride 2x2;
        //            norm: RESPONSE_ACROSS_MAPS
        //          output: 13x13x256
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[norm1],0};
            di->workflow_item_create_function(&workflow_layer[subv1_1],1,&inputs_descriptor,1); // view g1

            workflow_layer[subv1_1]->type = NN_WORK_ITEM_TYPE_VIEW;
            workflow_layer[subv1_1]->arguments.view.origin[0] = 0;
            workflow_layer[subv1_1]->arguments.view.origin[1] = 0;
            workflow_layer[subv1_1]->arguments.view.origin[2] = 0;

            workflow_layer[subv1_1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[subv1_1]->output_format[0].format_3d ={{27,27,96/2}};

        }

        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[norm1],0};
            di->workflow_item_create_function(&workflow_layer[subv1_2],1,&inputs_descriptor,1);   // view g2

            workflow_layer[subv1_2]->type = NN_WORK_ITEM_TYPE_VIEW;
            workflow_layer[subv1_2]->arguments.view.origin[0] = 0;
            workflow_layer[subv1_2]->arguments.view.origin[1] = 0;
            workflow_layer[subv1_2]->arguments.view.origin[2] = (96/2);

            workflow_layer[subv1_2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[subv1_2]->output_format[0].format_3d ={{27,27,96/2}};
        }

        // convolution 2, g1: 5x5 stride 1x1; ReLU; 0-padded output: 13x13x(2*96/2)
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[subv1_1],0};
            di->workflow_item_create_function(&workflow_layer[conv2_1],1,&inputs_descriptor,1);

            workflow_layer[conv2_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION;
            workflow_layer[conv2_1]->name = "c2g1";

            workflow_layer[conv2_1]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU;
            workflow_layer[conv2_1]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO;

            workflow_layer[conv2_1]->arguments.forward_convolution.weights = workflow_layer_factor[conv2_1_weights];
            workflow_layer[conv2_1]->arguments.forward_convolution.biases = workflow_layer_factor[conv2_1_biases];

            workflow_layer[conv2_1]->arguments.forward_convolution.center_offset[0] = 2;
            workflow_layer[conv2_1]->arguments.forward_convolution.center_offset[1] = 2;

            workflow_layer[conv2_1]->arguments.forward_convolution.stride[0] = 1;
            workflow_layer[conv2_1]->arguments.forward_convolution.stride[1] = 1;

            workflow_layer[conv2_1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv2_1]->output_format[0].format_3d ={{27,27,256/2}};
        }

        // convolution 2, g2: 5x5 stride 1x1; ReLU; 0-padded output: 13x13x(2*96/2)
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[subv1_2],0};
            di->workflow_item_create_function(&workflow_layer[conv2_2],1,&inputs_descriptor,1);

            workflow_layer[conv2_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION;
            workflow_layer[conv2_2]->name = "c2g2";

            workflow_layer[conv2_2]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU;
            workflow_layer[conv2_2]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO;

            workflow_layer[conv2_2]->arguments.forward_convolution.weights = workflow_layer_factor[conv2_2_weights];
            workflow_layer[conv2_2]->arguments.forward_convolution.biases = workflow_layer_factor[conv2_2_biases];

            workflow_layer[conv2_2]->arguments.forward_convolution.center_offset[0] = 2;
            workflow_layer[conv2_2]->arguments.forward_convolution.center_offset[1] = 2;

            workflow_layer[conv2_2]->arguments.forward_convolution.stride[0] = 1;
            workflow_layer[conv2_2]->arguments.forward_convolution.stride[1] = 1;

            workflow_layer[conv2_2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv2_2]->output_format[0].format_3d ={{27,27,256/2}};
        }

        // merge g1 and g2
        {
            nn_workflow_use_descriptor_t inputs_descriptor[] ={{workflow_layer[conv2_1],0},{workflow_layer[conv2_2],0}};
            di->workflow_item_create_function(&workflow_layer[merge2],2,inputs_descriptor,1);

            workflow_layer[merge2]->type = NN_WORK_ITEM_TYPE_MERGE;
            workflow_layer[merge2]->arguments.forward_merge.axis = 2; // value 2 for z-axis

            workflow_layer[merge2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[merge2]->output_format[0].format_3d ={{27,27,256}};

        }

        // maxpool: 3x3 stride 2x2;
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[merge2],0};
            di->workflow_item_create_function(&workflow_layer[pool2],1,&inputs_descriptor,1); // pooling

            workflow_layer[pool2]->type = NN_WORK_ITEM_TYPE_POOLING;
            workflow_layer[pool2]->name = "p2";

            workflow_layer[pool2]->arguments.forward_pooling.mode = NN_POOLING_MODE_MAX;

            workflow_layer[pool2]->arguments.forward_pooling.size[0] = 3;
            workflow_layer[pool2]->arguments.forward_pooling.size[1] = 3;

            workflow_layer[pool2]->arguments.forward_pooling.stride[0] = 2;
            workflow_layer[pool2]->arguments.forward_pooling.stride[1] = 2;

            workflow_layer[pool2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[pool2]->output_format[0].format_3d ={{13,13,256}};
        }

        //norm: RESPONSE_ACROSS_MAPS; output: 13x13x256
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[pool2],0};
            di->workflow_item_create_function(&workflow_layer[norm2],1,&inputs_descriptor,1);

            workflow_layer[norm2]->type = NN_WORK_ITEM_TYPE_NORMALIZATION;
            workflow_layer[norm2]->name = "lrn2";

            workflow_layer[norm2]->arguments.forward_normalization.normalization.mode = NN_NORMALIZATION_MODE_RESPONSE_ACROSS_MAPS;
            workflow_layer[norm2]->arguments.forward_normalization.normalization.k = 1;              // |
            workflow_layer[norm2]->arguments.forward_normalization.normalization.n = 5;              // |
            workflow_layer[norm2]->arguments.forward_normalization.normalization.alpha = 0.0001f/5;  // > see coment at wrkflwi_stage_1_norm
            workflow_layer[norm2]->arguments.forward_normalization.normalization.beta = 0.75f;       // |

            workflow_layer[norm2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[norm2]->output_format[0].format_3d ={{13,13,256}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 03
        //           convo: 3x3 stride 1x1; ReLU; 0-padded
        //          output: 13x13x384
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[norm2],0};
            di->workflow_item_create_function(&workflow_layer[conv3],1,&inputs_descriptor,1);

            workflow_layer[conv3]->type = NN_WORK_ITEM_TYPE_CONVOLUTION;
            workflow_layer[conv3]->name = "c3";
            workflow_layer[conv3]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU;
            workflow_layer[conv3]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO;

            workflow_layer[conv3]->arguments.forward_convolution.weights = workflow_layer_factor[conv3_weights];
            workflow_layer[conv3]->arguments.forward_convolution.biases = workflow_layer_factor[conv3_biases];

            workflow_layer[conv3]->arguments.forward_convolution.center_offset[0] = 1;
            workflow_layer[conv3]->arguments.forward_convolution.center_offset[1] = 1;

            workflow_layer[conv3]->arguments.forward_convolution.stride[0] = 1;
            workflow_layer[conv3]->arguments.forward_convolution.stride[1] = 1;

            workflow_layer[conv3]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv3]->output_format[0].format_3d ={{13,13,384}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 04
        //           split: 2 (z-axis 384/2)
        //           convo: 3x3 stride 1x1; ReLU; 0-padded
        //          output: 13x13x(2*384/2) (continue split to next stage)
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv3],0};
            di->workflow_item_create_function(&workflow_layer[subv3_1],1,&inputs_descriptor,1); // view g1

            workflow_layer[subv3_1]->type = NN_WORK_ITEM_TYPE_VIEW;
            workflow_layer[subv3_1]->arguments.view.origin[0] = 0;
            workflow_layer[subv3_1]->arguments.view.origin[1] = 0;
            workflow_layer[subv3_1]->arguments.view.origin[2] = 0;

            workflow_layer[subv3_1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[subv3_1]->output_format[0].format_3d ={{13,13,384/2}};
        }

        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv3],0};
            di->workflow_item_create_function(&workflow_layer[subv3_2],1,&inputs_descriptor,1); // view g2

            workflow_layer[subv3_2]->type = NN_WORK_ITEM_TYPE_VIEW;
            workflow_layer[subv3_2]->arguments.view.origin[0] = 0;
            workflow_layer[subv3_2]->arguments.view.origin[1] = 0;
            workflow_layer[subv3_2]->arguments.view.origin[2] = 384/2;

            workflow_layer[subv3_2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[subv3_2]->output_format[0].format_3d ={{13,13,384/2}};

        }

        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[subv3_1],0};
            di->workflow_item_create_function(&workflow_layer[conv4_1],1,&inputs_descriptor,1); // conv g1

            workflow_layer[conv4_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION;
            workflow_layer[conv4_1]->name = "c4g1";

            workflow_layer[conv4_1]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU;
            workflow_layer[conv4_1]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO;

            workflow_layer[conv4_1]->arguments.forward_convolution.weights = workflow_layer_factor[conv4_1_weights];
            workflow_layer[conv4_1]->arguments.forward_convolution.biases = workflow_layer_factor[conv4_1_biases];

            workflow_layer[conv4_1]->arguments.forward_convolution.center_offset[0] = 1;
            workflow_layer[conv4_1]->arguments.forward_convolution.center_offset[1] = 1;

            workflow_layer[conv4_1]->arguments.forward_convolution.stride[0] = 1;
            workflow_layer[conv4_1]->arguments.forward_convolution.stride[1] = 1;

            workflow_layer[conv4_1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv4_1]->output_format[0].format_3d ={{13,13,384/2}};
        }

        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[subv3_2],0};
            di->workflow_item_create_function(&workflow_layer[conv4_2],1,&inputs_descriptor,1); // conv g2

            workflow_layer[conv4_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION;
            workflow_layer[conv4_2]->name = "c4g2";

            workflow_layer[conv4_2]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU;
            workflow_layer[conv4_2]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO;

            workflow_layer[conv4_2]->arguments.forward_convolution.weights = workflow_layer_factor[conv4_1_weights];
            workflow_layer[conv4_2]->arguments.forward_convolution.biases = workflow_layer_factor[conv4_2_biases];

            workflow_layer[conv4_2]->arguments.forward_convolution.center_offset[0] = 1;
            workflow_layer[conv4_2]->arguments.forward_convolution.center_offset[1] = 1;

            workflow_layer[conv4_2]->arguments.forward_convolution.stride[0] = 1;
            workflow_layer[conv4_2]->arguments.forward_convolution.stride[1] = 1;

            workflow_layer[conv4_2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv4_2]->output_format[0].format_3d ={{13,13,384/2}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 05
        //           convo: 3x3 stride 1x1; ReLU; 0-padded; output: 13x13x(2*256/2)
        //           merge: (z-axis)
        //         maxpool: 3x3 stride 2x2;
        //          output: 13x13x256
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv4_1],0};
            di->workflow_item_create_function(&workflow_layer[conv5_1],1,&inputs_descriptor,1); // conv g1

            workflow_layer[conv5_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION;
            workflow_layer[conv5_1]->name = "c5g1";

            workflow_layer[conv5_1]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU;
            workflow_layer[conv5_1]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO;

            workflow_layer[conv5_1]->arguments.forward_convolution.weights = workflow_layer_factor[conv5_1_weights];
            workflow_layer[conv5_1]->arguments.forward_convolution.biases = workflow_layer_factor[conv5_1_biases];

            workflow_layer[conv5_1]->arguments.forward_convolution.center_offset[0] = 1;
            workflow_layer[conv5_1]->arguments.forward_convolution.center_offset[1] = 1;

            workflow_layer[conv5_1]->arguments.forward_convolution.stride[0] = 1;
            workflow_layer[conv5_1]->arguments.forward_convolution.stride[1] = 1;

            workflow_layer[conv5_1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv5_1]->output_format[0].format_3d ={{13,13,256/2}};
        }

        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv4_2],0};
            di->workflow_item_create_function(&workflow_layer[conv5_2],1,&inputs_descriptor,1); // conv g2

            workflow_layer[conv5_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION;
            workflow_layer[conv5_2]->name = "c5g2";

            workflow_layer[conv5_2]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU;
            workflow_layer[conv5_2]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO;

            workflow_layer[conv5_2]->arguments.forward_convolution.weights = workflow_layer_factor[conv5_2_weights];
            workflow_layer[conv5_2]->arguments.forward_convolution.biases = workflow_layer_factor[conv5_2_biases];

            workflow_layer[conv5_2]->arguments.forward_convolution.center_offset[0] = 1;
            workflow_layer[conv5_2]->arguments.forward_convolution.center_offset[1] = 1;

            workflow_layer[conv5_2]->arguments.forward_convolution.stride[0] = 1;
            workflow_layer[conv5_2]->arguments.forward_convolution.stride[1] = 1;

            workflow_layer[conv5_2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv5_2]->output_format[0].format_3d ={{13,13,256/2}};
        }

        // merge g1 and g2
        {
            nn_workflow_use_descriptor_t inputs_descriptor[] ={{workflow_layer[conv5_1],0},{workflow_layer[conv5_2],0}};
            di->workflow_item_create_function(&workflow_layer[merge5],2,inputs_descriptor,1);

            workflow_layer[merge5]->type = NN_WORK_ITEM_TYPE_MERGE;
            workflow_layer[merge5]->arguments.forward_merge.axis = 2; // value 2 for z-axis

            workflow_layer[merge5]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[merge5]->output_format[0].format_3d ={{13,13,256}};
        }

        // maxpool: 3x3 stride 2x2;
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[merge5],0};
            di->workflow_item_create_function(&workflow_layer[pool5],1,&inputs_descriptor,1); // pooling

            workflow_layer[pool5]->type = NN_WORK_ITEM_TYPE_POOLING;
            workflow_layer[pool5]->name = "p5";

            workflow_layer[pool5]->arguments.forward_pooling.mode = NN_POOLING_MODE_MAX;

            workflow_layer[pool5]->arguments.forward_pooling.size[0] = 3;
            workflow_layer[pool5]->arguments.forward_pooling.size[1] = 3;

            workflow_layer[pool5]->arguments.forward_pooling.stride[0] = 2;
            workflow_layer[pool5]->arguments.forward_pooling.stride[1] = 2;

            workflow_layer[pool5]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[pool5]->output_format[0].format_3d ={{6,6,256}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 06
        //            full: ReLU
        //          output: 4096
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[pool5],0};
            di->workflow_item_create_function(&workflow_layer[fc6],1,&inputs_descriptor,1);

            workflow_layer[fc6]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED;
            workflow_layer[fc6]->name = "fc6";

            workflow_layer[fc6]->arguments.forward_fully_connected.activation.function = NN_ACTIVATION_FUNCTION_RELU;

            workflow_layer[fc6]->arguments.forward_fully_connected.weights = workflow_layer_factor[fc6_weights];
            workflow_layer[fc6]->arguments.forward_fully_connected.biases = workflow_layer_factor[fc6_biases];

            workflow_layer[fc6]->output_format[0].format = NN_DATA_FORMAT_1D;
            workflow_layer[fc6]->output_format[0].format_1d ={{4096}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 07
        //            full: ReLU
        //          output: 4096
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[fc6],0};
            di->workflow_item_create_function(&workflow_layer[fc7],1,&inputs_descriptor,1);

            workflow_layer[fc7]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED;
            workflow_layer[fc7]->name = "fc7";
            workflow_layer[fc7]->arguments.forward_fully_connected.activation.function = NN_ACTIVATION_FUNCTION_RELU;

            workflow_layer[fc7]->arguments.forward_fully_connected.weights = workflow_layer_factor[fc7_weights];
            workflow_layer[fc7]->arguments.forward_fully_connected.biases = workflow_layer_factor[fc7_biases];

            workflow_layer[fc7]->output_format[0].format = NN_DATA_FORMAT_1D;
            workflow_layer[fc7]->output_format[0].format_1d ={{4096}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 08
        //            full: ;
        //          output: 1000
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[fc7],0};
            di->workflow_item_create_function(&workflow_layer[fc8],1,&inputs_descriptor,1);

            workflow_layer[fc8]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED;
            workflow_layer[fc8]->name = "fc8";

            workflow_layer[fc8]->arguments.forward_fully_connected.activation.function = NN_ACTIVATION_FUNCTION_NONE;

            workflow_layer[fc8]->arguments.forward_fully_connected.weights = workflow_layer_factor[fc8_weights];
            workflow_layer[fc8]->arguments.forward_fully_connected.biases = workflow_layer_factor[fc8_biases];

            workflow_layer[fc8]->output_format[0].format = NN_DATA_FORMAT_1D;
            workflow_layer[fc8]->output_format[0].format_1d ={{1000}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 09 (softmax)
        //          output: 1000
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[fc8],0};
            di->workflow_item_create_function(&workflow_layer[softmax],1,&inputs_descriptor,1);

            workflow_layer[softmax]->type = NN_WORK_ITEM_TYPE_SOFTMAX;

            workflow_layer[softmax]->output_format[0].format = NN_DATA_FORMAT_1D;
            workflow_layer[softmax]->output_format[0].format_1d ={{1000}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 10 (output)
        //          output: 1000
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[softmax],0};
            di->workflow_item_create_function(&workflow_layer[output],1,&inputs_descriptor,1);

            workflow_layer[output]->type = NN_WORK_ITEM_TYPE_OUTPUT;

            workflow_layer[output]->output_format[0].format = NN_DATA_FORMAT_1D;
            workflow_layer[output]->output_format[0].format_1d ={{1000}};

        }

        // -------------------------------------------------------------------------------------------
        // END of workflow stages definition
        // -------------------------------------------------------------------------------------------
        workflow->input[0] = workflow_layer[input];
        workflow->output[0] = workflow_layer[output];
        // -------------------------------------------------------------------------------------------

        return workflow;
    }