コード例 #1
0
bool test_convolution_float_cpu_random::run() {
    bool run_ok = true;
    test_measurement_result run_result;
    run_result.description = "RUN SUMMARY: " + test_description;

    std::cout << "-> Testing: " << test_description << std::endl;

    try {
        if(!init()) throw std::runtime_error( "init() returns false so can't run test" );

        NN_WORKLOAD_DATA_TYPE input_format  = NN_WORKLOAD_DATA_TYPE_F32_3D_BATCH;
        NN_WORKLOAD_DATA_TYPE output_format = NN_WORKLOAD_DATA_TYPE_F32_3D_BATCH;

        for(uint32_t batch : { 1, 8, 48 }) {
            bool local_ok = true;
            test_measurement_result local_result;
            local_result.description = "RUN PART: (batch " + std::to_string( batch ) + ") execution of " + test_description;
            C_time_control  local_timer;

            // begin local test
            uint32_t z = 2,
                img_size = 227,
                num_features_map = 8;

            nn::data<float, 4> *images = new nn::data<float, 4>( img_size, img_size, z, batch );
            if(images == nullptr) throw std::runtime_error("Cant't create images nn::data");

            nn_data_populate( nn::data_cast<float, 0>(images),
                0.0f,
                255.0f );

            nn::data<float, 4> *images_with_padding = new nn::data<float, 4>( img_size + 2, img_size + 2, z, batch );
            if(images_with_padding == nullptr) {
                delete images;
                throw std::runtime_error("Cant't create images_with_padding nn::data");
            }
            { // padding for input for naive method
                nn_data_populate( nn::data_cast<float, 0>(images_with_padding),
                    0.0f );
                for(uint32_t tmp_batch = 0; tmp_batch < batch; ++tmp_batch)
                    for(uint32_t tmp_z = 0; tmp_z < z; ++tmp_z)
                        for(uint32_t y = 0; y < img_size; ++y)
                            for(uint32_t x = 0; x < img_size; ++x)
                                images_with_padding->at( x, y, tmp_z, tmp_batch ) = images->at( x, y, tmp_z, tmp_batch );

            }

            nn_workload_t *workload = nullptr;
            nn_data_t *input_array[1] = { images };
            auto workload_output = new nn::data<float, 4>( img_size, img_size, num_features_map, batch );
            if(workload_output==nullptr) {
                delete images;
                delete images_with_padding;
                throw std::runtime_error("unable to create workload_output for batch = " +std::to_string(batch));
            }

            nn::data<float> *output_array_cmpl[1] = { nn::data_cast<float, 0>(workload_output) };

            auto naive_output = new nn::data<float, 4>( img_size, img_size, num_features_map, batch );
            if(naive_output==nullptr) {
                delete images;
                delete images_with_padding;
                delete workload_output;
                throw std::runtime_error("unable to create naive_output for batch = " +std::to_string(batch));
            }

            auto status = di->workflow_compile_function( &workload, di->device, workflow, &input_format, &output_format, batch );
            if(!workload) throw std::runtime_error( "workload compilation failed for batch = " + std::to_string( batch )
                + " status: " + std::to_string( status ) );

            test_measurement_result run_result;
            run_result.description = "RUN PART: (batch " + std::to_string( batch ) + ") execution of " + test_description;

            // changing order needed
            //di->workload_execute_function( workload, reinterpret_cast<void**>(input_array), reinterpret_cast<void**>(output_array_cmpl), &status );

            float* biases = nullptr;
            float* weights = nullptr;

            { // read biases and weights
                if(NN_WORK_ITEM_TYPE_CONVOLUTION == workflow->input[0]->use[0].item->type) {
                    auto tmp = reinterpret_cast<nn_arguments_forward_convolution_t*>(&workflow->input[0]->use[0].item->arguments);
                    biases = reinterpret_cast<float*>(tmp->biases->buffer);
                    weights = reinterpret_cast<float*>(tmp->weights->buffer);
                }
            }

            if(nullptr == biases || nullptr == weights)
                throw std::runtime_error( "reading weight or biases for naive version failed for batch = " + std::to_string( batch ) );

            naive_convolv_float_implementation(
                reinterpret_cast<float*>(images_with_padding->buffer),
                reinterpret_cast<float*>(naive_output->buffer),
                biases,
                weights,
                batch,
                num_features_map,
                z,
                img_size,
                img_size,
                img_size + 2,
                img_size + 2,
                3,
                3,
                1,
                1,
                NN_ACTIVATION_FUNCTION_RELU );

            //local_ok = compare_4d_data( workload_output, naive_output );
            local_ok = true; // BLIND TEST

            // end of local test
            // summary:
            local_timer.tock();
            local_result.time_consumed   = local_timer.get_time_diff();
            local_result.clocks_consumed = local_timer.get_clocks_diff();
            local_result.passed = local_ok;
            tests_results << local_result;

            run_ok = run_ok && local_ok;

            if(workload_output)      delete workload_output;
            if(naive_output)         delete naive_output;
            if(images)               delete images;
            if(images_with_padding)  delete images_with_padding;
        }
    } catch(std::runtime_error &error) {
        tests_results << run_result;
        std::cout << "error: " << error.what() << std::endl;
    } catch(std::exception &error) {
        tests_results << run_result;
        std::cout << "error: " << error.what() << std::endl;
    } catch(...) {
        tests_results << run_result;
        std::cout << "error: unknown" << std::endl;
    }
    if(!done()) run_ok = false;
    std::cout << "<- Test " << (run_ok ? "passed" : "failed") << std::endl;;
    return run_ok;
}
コード例 #2
0
    virtual nn_workflow_t *init_test_workflow(nn_device_interface_0_t *_di) {

        if(!is_valid()) throw std::runtime_error(error_);

        for(auto wi : workflow_layer) wi = nullptr;
        for(auto wb : workflow_layer_factor) wb = nullptr;

        this->di = _di;




        // create and populate nn:data factors (weights and biases) for successive layers

        workflow_layer_factor[mean_factor] = new nn::data<float>(img_size,img_size,3);
        nn_data_populate(workflow_layer_factor[mean_factor],104.007f,122.679f);

        workflow_layer_factor[conv1_weights] = new nn::data<float>(11,11,3,96);
        nn_data_populate(workflow_layer_factor[conv1_weights],-0.374f,0.403f);

        workflow_layer_factor[conv1_biases] = new nn::data<float>(96);
        nn_data_populate(workflow_layer_factor[conv1_biases],-0.854f,0.232f);

        workflow_layer_factor[conv2_1_weights] = new nn::data<float>(5,5,48,128);
        nn_data_populate(workflow_layer_factor[conv2_1_weights],-0.285f,0.379f);

        workflow_layer_factor[conv2_1_biases] = new nn::data<float>(128);
        nn_data_populate(workflow_layer_factor[conv2_1_biases],0.974f,1.034f);

        workflow_layer_factor[conv2_2_weights] = new nn::data<float>(5,5,48,128);
        nn_data_populate(workflow_layer_factor[conv2_2_weights],-0.269f,0.416f);

        workflow_layer_factor[conv2_2_biases] = new nn::data<float>(128);
        nn_data_populate(workflow_layer_factor[conv2_2_biases],0.958f,1.027f);

        workflow_layer_factor[conv3_weights] = new nn::data<float>(3,3,256,384);
        nn_data_populate(workflow_layer_factor[conv3_weights],-0.185f,0.512f);

        workflow_layer_factor[conv3_biases] = new nn::data<float>(384);
        nn_data_populate(workflow_layer_factor[conv3_biases],-0.104f,0.093f);

        workflow_layer_factor[conv4_1_weights] = new nn::data<float>(3,3,192,192);
        nn_data_populate(workflow_layer_factor[conv4_1_weights],-0.103f,0.322f);

        workflow_layer_factor[conv4_1_biases] = new nn::data<float>(192);
        nn_data_populate(workflow_layer_factor[conv4_1_biases],0.844f,1.142f);

        workflow_layer_factor[conv4_2_weights] = new nn::data<float>(3,3,192,192);
        nn_data_populate(workflow_layer_factor[conv4_2_weights],-0.142f,0.353f);

        workflow_layer_factor[conv4_2_biases] = new nn::data<float>(192);
        nn_data_populate(workflow_layer_factor[conv4_2_biases],0.77f,1.219f);

        workflow_layer_factor[conv5_1_weights] = new nn::data<float>(3,3,192,128);
        nn_data_populate(workflow_layer_factor[conv5_1_weights],-0.092f,0.254f);

        workflow_layer_factor[conv5_1_biases] = new nn::data<float>(128);
        nn_data_populate(workflow_layer_factor[conv5_1_biases],0.723f,1.50f);

        workflow_layer_factor[conv5_2_weights] = new nn::data<float>(3,3,192,128);
        nn_data_populate(workflow_layer_factor[conv5_2_weights],-0.133f,0.315f);

        workflow_layer_factor[conv5_2_biases] = new nn::data<float>(128);
        nn_data_populate(workflow_layer_factor[conv5_2_biases],0.623f,1.742f);

        workflow_layer_factor[fc6_weights] = new nn::data<float>(6,6,256,4096);
        nn_data_populate(workflow_layer_factor[fc6_weights],-0.035f,0.048f);

        workflow_layer_factor[fc6_biases] = new nn::data<float>(4096);
        nn_data_populate(workflow_layer_factor[fc6_biases],0.92f,1.057f);

        workflow_layer_factor[fc7_weights] = new nn::data<float>(4096,4096);
        nn_data_populate(workflow_layer_factor[fc7_weights],-0.032f,0.052f);

        workflow_layer_factor[fc7_biases] = new nn::data<float>(4096);
        nn_data_populate(workflow_layer_factor[fc7_biases],0.741f,1.26f);

        workflow_layer_factor[fc8_weights] = new nn::data<float>(4096,1000);
        nn_data_populate(workflow_layer_factor[fc8_weights],-0.045f,0.067f);

        workflow_layer_factor[fc8_biases] = new nn::data<float>(1000);
        nn_data_populate(workflow_layer_factor[fc8_biases],-0.351f,0.425f);

        di->workflow_create_function(&workflow,1,1);
        // ------------------------------------------------------------------------------------------
        // STAGE 0 (input)
        //         output: 227x227x3
        {
            di->workflow_item_create_function(&workflow_layer[input],0,nullptr,1);

            workflow_layer[input]->type = NN_WORK_ITEM_TYPE_INPUT;
            workflow_layer[input]->arguments.input.index = 0;
            workflow_layer[input]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[input]->output_format[0].format_3d ={{img_size,img_size,3}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 0 (imagenet_mean_subtract)
        //         output: 227x227x3
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[input],0};
            di->workflow_item_create_function(&workflow_layer[mean_substract],1,&inputs_descriptor,1);

            workflow_layer[mean_substract]->type = NN_WORK_ITEM_TYPE_ARITHMETIC;
            workflow_layer[mean_substract]->arguments.forward_arithmetic.factor = workflow_layer_factor[mean_factor];
            workflow_layer[mean_substract]->arguments.forward_arithmetic.arithmetic_function = NN_ARITHMETIC_FUNCTION_SUBTRACTION;

            workflow_layer[mean_substract]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[mean_substract]->output_format[0].format_3d ={{img_size,img_size,3}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 01
        //           convo: 11x11 stride 4x4; ReLU; output: 55x55x96
        //         maxpool: 3x3 stride 2x2;
        //            norm: RESPONSE_ACROSS_MAPS
        //          output: 27x27x96
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[mean_substract],0};
            di->workflow_item_create_function(&workflow_layer[conv1],1,&inputs_descriptor,1);

            workflow_layer[conv1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION;
            workflow_layer[conv1]->name = "c1";

            workflow_layer[conv1]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO;
            workflow_layer[conv1]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU;

            workflow_layer[conv1]->arguments.forward_convolution.weights = workflow_layer_factor[conv1_weights];
            workflow_layer[conv1]->arguments.forward_convolution.biases = workflow_layer_factor[conv1_biases];

            workflow_layer[conv1]->arguments.forward_convolution.center_offset[0] = 0;
            workflow_layer[conv1]->arguments.forward_convolution.center_offset[1] = 0;

            workflow_layer[conv1]->arguments.forward_convolution.stride[0] = 4;
            workflow_layer[conv1]->arguments.forward_convolution.stride[1] = 4;

            workflow_layer[conv1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv1]->output_format[0].format_3d ={{55,55,96}};
        }

        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv1],0};
            di->workflow_item_create_function(&workflow_layer[pool1],1,&inputs_descriptor,1);

            workflow_layer[pool1]->type = NN_WORK_ITEM_TYPE_POOLING;
            workflow_layer[pool1]->name = "p1";

            workflow_layer[pool1]->arguments.forward_pooling.mode = NN_POOLING_MODE_MAX;
            workflow_layer[pool1]->arguments.forward_pooling.size[0] = 3;
            workflow_layer[pool1]->arguments.forward_pooling.size[1] = 3;
            workflow_layer[pool1]->arguments.forward_pooling.stride[0] = 2;
            workflow_layer[pool1]->arguments.forward_pooling.stride[1] = 2;

            workflow_layer[pool1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[pool1]->output_format[0].format_3d ={{27,27,96}};
        }

        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[pool1],0};
            di->workflow_item_create_function(&workflow_layer[norm1],1,&inputs_descriptor,1);

            workflow_layer[norm1]->type = NN_WORK_ITEM_TYPE_NORMALIZATION;
            workflow_layer[norm1]->name = "lrn1";

            workflow_layer[norm1]->arguments.forward_normalization.normalization.mode = NN_NORMALIZATION_MODE_RESPONSE_ACROSS_MAPS;
            workflow_layer[norm1]->arguments.forward_normalization.normalization.k = 1; // in Krishevsky's article is 2
            workflow_layer[norm1]->arguments.forward_normalization.normalization.n = 5;
            workflow_layer[norm1]->arguments.forward_normalization.normalization.alpha = 0.0001f/5; // in Krishevsky's paper is 1e-4,
            // but didn't write that sum of the squares
            // is divided by number of elements (n)
            workflow_layer[norm1]->arguments.forward_normalization.normalization.beta = 0.75f;

            workflow_layer[norm1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[norm1]->output_format[0].format_3d ={{27,27,96}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 02
        //           split: 2 (z-axis 96/2); output 27x27x(2*96/2)
        //           convo: 5x5 stride 1x1; ReLU; 0-padded output: 27x27x(2*256/2)
        //           merge: (z-axis)
        //         maxpool: 3x3 stride 2x2;
        //            norm: RESPONSE_ACROSS_MAPS
        //          output: 13x13x256
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[norm1],0};
            di->workflow_item_create_function(&workflow_layer[subv1_1],1,&inputs_descriptor,1); // view g1

            workflow_layer[subv1_1]->type = NN_WORK_ITEM_TYPE_VIEW;
            workflow_layer[subv1_1]->arguments.view.origin[0] = 0;
            workflow_layer[subv1_1]->arguments.view.origin[1] = 0;
            workflow_layer[subv1_1]->arguments.view.origin[2] = 0;

            workflow_layer[subv1_1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[subv1_1]->output_format[0].format_3d ={{27,27,96/2}};

        }

        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[norm1],0};
            di->workflow_item_create_function(&workflow_layer[subv1_2],1,&inputs_descriptor,1);   // view g2

            workflow_layer[subv1_2]->type = NN_WORK_ITEM_TYPE_VIEW;
            workflow_layer[subv1_2]->arguments.view.origin[0] = 0;
            workflow_layer[subv1_2]->arguments.view.origin[1] = 0;
            workflow_layer[subv1_2]->arguments.view.origin[2] = (96/2);

            workflow_layer[subv1_2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[subv1_2]->output_format[0].format_3d ={{27,27,96/2}};
        }

        // convolution 2, g1: 5x5 stride 1x1; ReLU; 0-padded output: 13x13x(2*96/2)
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[subv1_1],0};
            di->workflow_item_create_function(&workflow_layer[conv2_1],1,&inputs_descriptor,1);

            workflow_layer[conv2_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION;
            workflow_layer[conv2_1]->name = "c2g1";

            workflow_layer[conv2_1]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU;
            workflow_layer[conv2_1]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO;

            workflow_layer[conv2_1]->arguments.forward_convolution.weights = workflow_layer_factor[conv2_1_weights];
            workflow_layer[conv2_1]->arguments.forward_convolution.biases = workflow_layer_factor[conv2_1_biases];

            workflow_layer[conv2_1]->arguments.forward_convolution.center_offset[0] = 2;
            workflow_layer[conv2_1]->arguments.forward_convolution.center_offset[1] = 2;

            workflow_layer[conv2_1]->arguments.forward_convolution.stride[0] = 1;
            workflow_layer[conv2_1]->arguments.forward_convolution.stride[1] = 1;

            workflow_layer[conv2_1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv2_1]->output_format[0].format_3d ={{27,27,256/2}};
        }

        // convolution 2, g2: 5x5 stride 1x1; ReLU; 0-padded output: 13x13x(2*96/2)
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[subv1_2],0};
            di->workflow_item_create_function(&workflow_layer[conv2_2],1,&inputs_descriptor,1);

            workflow_layer[conv2_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION;
            workflow_layer[conv2_2]->name = "c2g2";

            workflow_layer[conv2_2]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU;
            workflow_layer[conv2_2]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO;

            workflow_layer[conv2_2]->arguments.forward_convolution.weights = workflow_layer_factor[conv2_2_weights];
            workflow_layer[conv2_2]->arguments.forward_convolution.biases = workflow_layer_factor[conv2_2_biases];

            workflow_layer[conv2_2]->arguments.forward_convolution.center_offset[0] = 2;
            workflow_layer[conv2_2]->arguments.forward_convolution.center_offset[1] = 2;

            workflow_layer[conv2_2]->arguments.forward_convolution.stride[0] = 1;
            workflow_layer[conv2_2]->arguments.forward_convolution.stride[1] = 1;

            workflow_layer[conv2_2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv2_2]->output_format[0].format_3d ={{27,27,256/2}};
        }

        // merge g1 and g2
        {
            nn_workflow_use_descriptor_t inputs_descriptor[] ={{workflow_layer[conv2_1],0},{workflow_layer[conv2_2],0}};
            di->workflow_item_create_function(&workflow_layer[merge2],2,inputs_descriptor,1);

            workflow_layer[merge2]->type = NN_WORK_ITEM_TYPE_MERGE;
            workflow_layer[merge2]->arguments.forward_merge.axis = 2; // value 2 for z-axis

            workflow_layer[merge2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[merge2]->output_format[0].format_3d ={{27,27,256}};

        }

        // maxpool: 3x3 stride 2x2;
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[merge2],0};
            di->workflow_item_create_function(&workflow_layer[pool2],1,&inputs_descriptor,1); // pooling

            workflow_layer[pool2]->type = NN_WORK_ITEM_TYPE_POOLING;
            workflow_layer[pool2]->name = "p2";

            workflow_layer[pool2]->arguments.forward_pooling.mode = NN_POOLING_MODE_MAX;

            workflow_layer[pool2]->arguments.forward_pooling.size[0] = 3;
            workflow_layer[pool2]->arguments.forward_pooling.size[1] = 3;

            workflow_layer[pool2]->arguments.forward_pooling.stride[0] = 2;
            workflow_layer[pool2]->arguments.forward_pooling.stride[1] = 2;

            workflow_layer[pool2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[pool2]->output_format[0].format_3d ={{13,13,256}};
        }

        //norm: RESPONSE_ACROSS_MAPS; output: 13x13x256
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[pool2],0};
            di->workflow_item_create_function(&workflow_layer[norm2],1,&inputs_descriptor,1);

            workflow_layer[norm2]->type = NN_WORK_ITEM_TYPE_NORMALIZATION;
            workflow_layer[norm2]->name = "lrn2";

            workflow_layer[norm2]->arguments.forward_normalization.normalization.mode = NN_NORMALIZATION_MODE_RESPONSE_ACROSS_MAPS;
            workflow_layer[norm2]->arguments.forward_normalization.normalization.k = 1;              // |
            workflow_layer[norm2]->arguments.forward_normalization.normalization.n = 5;              // |
            workflow_layer[norm2]->arguments.forward_normalization.normalization.alpha = 0.0001f/5;  // > see coment at wrkflwi_stage_1_norm
            workflow_layer[norm2]->arguments.forward_normalization.normalization.beta = 0.75f;       // |

            workflow_layer[norm2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[norm2]->output_format[0].format_3d ={{13,13,256}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 03
        //           convo: 3x3 stride 1x1; ReLU; 0-padded
        //          output: 13x13x384
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[norm2],0};
            di->workflow_item_create_function(&workflow_layer[conv3],1,&inputs_descriptor,1);

            workflow_layer[conv3]->type = NN_WORK_ITEM_TYPE_CONVOLUTION;
            workflow_layer[conv3]->name = "c3";
            workflow_layer[conv3]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU;
            workflow_layer[conv3]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO;

            workflow_layer[conv3]->arguments.forward_convolution.weights = workflow_layer_factor[conv3_weights];
            workflow_layer[conv3]->arguments.forward_convolution.biases = workflow_layer_factor[conv3_biases];

            workflow_layer[conv3]->arguments.forward_convolution.center_offset[0] = 1;
            workflow_layer[conv3]->arguments.forward_convolution.center_offset[1] = 1;

            workflow_layer[conv3]->arguments.forward_convolution.stride[0] = 1;
            workflow_layer[conv3]->arguments.forward_convolution.stride[1] = 1;

            workflow_layer[conv3]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv3]->output_format[0].format_3d ={{13,13,384}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 04
        //           split: 2 (z-axis 384/2)
        //           convo: 3x3 stride 1x1; ReLU; 0-padded
        //          output: 13x13x(2*384/2) (continue split to next stage)
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv3],0};
            di->workflow_item_create_function(&workflow_layer[subv3_1],1,&inputs_descriptor,1); // view g1

            workflow_layer[subv3_1]->type = NN_WORK_ITEM_TYPE_VIEW;
            workflow_layer[subv3_1]->arguments.view.origin[0] = 0;
            workflow_layer[subv3_1]->arguments.view.origin[1] = 0;
            workflow_layer[subv3_1]->arguments.view.origin[2] = 0;

            workflow_layer[subv3_1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[subv3_1]->output_format[0].format_3d ={{13,13,384/2}};
        }

        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv3],0};
            di->workflow_item_create_function(&workflow_layer[subv3_2],1,&inputs_descriptor,1); // view g2

            workflow_layer[subv3_2]->type = NN_WORK_ITEM_TYPE_VIEW;
            workflow_layer[subv3_2]->arguments.view.origin[0] = 0;
            workflow_layer[subv3_2]->arguments.view.origin[1] = 0;
            workflow_layer[subv3_2]->arguments.view.origin[2] = 384/2;

            workflow_layer[subv3_2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[subv3_2]->output_format[0].format_3d ={{13,13,384/2}};

        }

        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[subv3_1],0};
            di->workflow_item_create_function(&workflow_layer[conv4_1],1,&inputs_descriptor,1); // conv g1

            workflow_layer[conv4_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION;
            workflow_layer[conv4_1]->name = "c4g1";

            workflow_layer[conv4_1]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU;
            workflow_layer[conv4_1]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO;

            workflow_layer[conv4_1]->arguments.forward_convolution.weights = workflow_layer_factor[conv4_1_weights];
            workflow_layer[conv4_1]->arguments.forward_convolution.biases = workflow_layer_factor[conv4_1_biases];

            workflow_layer[conv4_1]->arguments.forward_convolution.center_offset[0] = 1;
            workflow_layer[conv4_1]->arguments.forward_convolution.center_offset[1] = 1;

            workflow_layer[conv4_1]->arguments.forward_convolution.stride[0] = 1;
            workflow_layer[conv4_1]->arguments.forward_convolution.stride[1] = 1;

            workflow_layer[conv4_1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv4_1]->output_format[0].format_3d ={{13,13,384/2}};
        }

        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[subv3_2],0};
            di->workflow_item_create_function(&workflow_layer[conv4_2],1,&inputs_descriptor,1); // conv g2

            workflow_layer[conv4_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION;
            workflow_layer[conv4_2]->name = "c4g2";

            workflow_layer[conv4_2]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU;
            workflow_layer[conv4_2]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO;

            workflow_layer[conv4_2]->arguments.forward_convolution.weights = workflow_layer_factor[conv4_1_weights];
            workflow_layer[conv4_2]->arguments.forward_convolution.biases = workflow_layer_factor[conv4_2_biases];

            workflow_layer[conv4_2]->arguments.forward_convolution.center_offset[0] = 1;
            workflow_layer[conv4_2]->arguments.forward_convolution.center_offset[1] = 1;

            workflow_layer[conv4_2]->arguments.forward_convolution.stride[0] = 1;
            workflow_layer[conv4_2]->arguments.forward_convolution.stride[1] = 1;

            workflow_layer[conv4_2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv4_2]->output_format[0].format_3d ={{13,13,384/2}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 05
        //           convo: 3x3 stride 1x1; ReLU; 0-padded; output: 13x13x(2*256/2)
        //           merge: (z-axis)
        //         maxpool: 3x3 stride 2x2;
        //          output: 13x13x256
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv4_1],0};
            di->workflow_item_create_function(&workflow_layer[conv5_1],1,&inputs_descriptor,1); // conv g1

            workflow_layer[conv5_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION;
            workflow_layer[conv5_1]->name = "c5g1";

            workflow_layer[conv5_1]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU;
            workflow_layer[conv5_1]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO;

            workflow_layer[conv5_1]->arguments.forward_convolution.weights = workflow_layer_factor[conv5_1_weights];
            workflow_layer[conv5_1]->arguments.forward_convolution.biases = workflow_layer_factor[conv5_1_biases];

            workflow_layer[conv5_1]->arguments.forward_convolution.center_offset[0] = 1;
            workflow_layer[conv5_1]->arguments.forward_convolution.center_offset[1] = 1;

            workflow_layer[conv5_1]->arguments.forward_convolution.stride[0] = 1;
            workflow_layer[conv5_1]->arguments.forward_convolution.stride[1] = 1;

            workflow_layer[conv5_1]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv5_1]->output_format[0].format_3d ={{13,13,256/2}};
        }

        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv4_2],0};
            di->workflow_item_create_function(&workflow_layer[conv5_2],1,&inputs_descriptor,1); // conv g2

            workflow_layer[conv5_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION;
            workflow_layer[conv5_2]->name = "c5g2";

            workflow_layer[conv5_2]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU;
            workflow_layer[conv5_2]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO;

            workflow_layer[conv5_2]->arguments.forward_convolution.weights = workflow_layer_factor[conv5_2_weights];
            workflow_layer[conv5_2]->arguments.forward_convolution.biases = workflow_layer_factor[conv5_2_biases];

            workflow_layer[conv5_2]->arguments.forward_convolution.center_offset[0] = 1;
            workflow_layer[conv5_2]->arguments.forward_convolution.center_offset[1] = 1;

            workflow_layer[conv5_2]->arguments.forward_convolution.stride[0] = 1;
            workflow_layer[conv5_2]->arguments.forward_convolution.stride[1] = 1;

            workflow_layer[conv5_2]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[conv5_2]->output_format[0].format_3d ={{13,13,256/2}};
        }

        // merge g1 and g2
        {
            nn_workflow_use_descriptor_t inputs_descriptor[] ={{workflow_layer[conv5_1],0},{workflow_layer[conv5_2],0}};
            di->workflow_item_create_function(&workflow_layer[merge5],2,inputs_descriptor,1);

            workflow_layer[merge5]->type = NN_WORK_ITEM_TYPE_MERGE;
            workflow_layer[merge5]->arguments.forward_merge.axis = 2; // value 2 for z-axis

            workflow_layer[merge5]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[merge5]->output_format[0].format_3d ={{13,13,256}};
        }

        // maxpool: 3x3 stride 2x2;
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[merge5],0};
            di->workflow_item_create_function(&workflow_layer[pool5],1,&inputs_descriptor,1); // pooling

            workflow_layer[pool5]->type = NN_WORK_ITEM_TYPE_POOLING;
            workflow_layer[pool5]->name = "p5";

            workflow_layer[pool5]->arguments.forward_pooling.mode = NN_POOLING_MODE_MAX;

            workflow_layer[pool5]->arguments.forward_pooling.size[0] = 3;
            workflow_layer[pool5]->arguments.forward_pooling.size[1] = 3;

            workflow_layer[pool5]->arguments.forward_pooling.stride[0] = 2;
            workflow_layer[pool5]->arguments.forward_pooling.stride[1] = 2;

            workflow_layer[pool5]->output_format[0].format = NN_DATA_FORMAT_3D;
            workflow_layer[pool5]->output_format[0].format_3d ={{6,6,256}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 06
        //            full: ReLU
        //          output: 4096
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[pool5],0};
            di->workflow_item_create_function(&workflow_layer[fc6],1,&inputs_descriptor,1);

            workflow_layer[fc6]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED;
            workflow_layer[fc6]->name = "fc6";

            workflow_layer[fc6]->arguments.forward_fully_connected.activation.function = NN_ACTIVATION_FUNCTION_RELU;

            workflow_layer[fc6]->arguments.forward_fully_connected.weights = workflow_layer_factor[fc6_weights];
            workflow_layer[fc6]->arguments.forward_fully_connected.biases = workflow_layer_factor[fc6_biases];

            workflow_layer[fc6]->output_format[0].format = NN_DATA_FORMAT_1D;
            workflow_layer[fc6]->output_format[0].format_1d ={{4096}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 07
        //            full: ReLU
        //          output: 4096
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[fc6],0};
            di->workflow_item_create_function(&workflow_layer[fc7],1,&inputs_descriptor,1);

            workflow_layer[fc7]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED;
            workflow_layer[fc7]->name = "fc7";
            workflow_layer[fc7]->arguments.forward_fully_connected.activation.function = NN_ACTIVATION_FUNCTION_RELU;

            workflow_layer[fc7]->arguments.forward_fully_connected.weights = workflow_layer_factor[fc7_weights];
            workflow_layer[fc7]->arguments.forward_fully_connected.biases = workflow_layer_factor[fc7_biases];

            workflow_layer[fc7]->output_format[0].format = NN_DATA_FORMAT_1D;
            workflow_layer[fc7]->output_format[0].format_1d ={{4096}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 08
        //            full: ;
        //          output: 1000
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[fc7],0};
            di->workflow_item_create_function(&workflow_layer[fc8],1,&inputs_descriptor,1);

            workflow_layer[fc8]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED;
            workflow_layer[fc8]->name = "fc8";

            workflow_layer[fc8]->arguments.forward_fully_connected.activation.function = NN_ACTIVATION_FUNCTION_NONE;

            workflow_layer[fc8]->arguments.forward_fully_connected.weights = workflow_layer_factor[fc8_weights];
            workflow_layer[fc8]->arguments.forward_fully_connected.biases = workflow_layer_factor[fc8_biases];

            workflow_layer[fc8]->output_format[0].format = NN_DATA_FORMAT_1D;
            workflow_layer[fc8]->output_format[0].format_1d ={{1000}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 09 (softmax)
        //          output: 1000
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[fc8],0};
            di->workflow_item_create_function(&workflow_layer[softmax],1,&inputs_descriptor,1);

            workflow_layer[softmax]->type = NN_WORK_ITEM_TYPE_SOFTMAX;

            workflow_layer[softmax]->output_format[0].format = NN_DATA_FORMAT_1D;
            workflow_layer[softmax]->output_format[0].format_1d ={{1000}};
        }

        // ------------------------------------------------------------------------------------------
        // STAGE 10 (output)
        //          output: 1000
        {
            nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[softmax],0};
            di->workflow_item_create_function(&workflow_layer[output],1,&inputs_descriptor,1);

            workflow_layer[output]->type = NN_WORK_ITEM_TYPE_OUTPUT;

            workflow_layer[output]->output_format[0].format = NN_DATA_FORMAT_1D;
            workflow_layer[output]->output_format[0].format_1d ={{1000}};

        }

        // -------------------------------------------------------------------------------------------
        // END of workflow stages definition
        // -------------------------------------------------------------------------------------------
        workflow->input[0] = workflow_layer[input];
        workflow->output[0] = workflow_layer[output];
        // -------------------------------------------------------------------------------------------

        return workflow;
    }
コード例 #3
0
bool test_softmax_float_cpu_random::run() {
    bool  run_ok = true;
    test_measurement_result   run_result;
    run_result.description = "RUN SUMMARY: " + test_description;

    C_time_control  run_timer;

    std::cout << "-> Testing: " << test_description << std::endl;

    try {
        if( !init() ) throw std::runtime_error( "init() returns false so can't run test" );
        run_timer.tick();   //start time measurement
        run_result << std::string( "run test with " + current_tested_device->get_device_description() );

        NN_WORKLOAD_DATA_TYPE input_format = NN_WORKLOAD_DATA_TYPE_F32_1D_BATCH;
        NN_WORKLOAD_DATA_TYPE output_format = NN_WORKLOAD_DATA_TYPE_F32_1D_BATCH;

        const int softmax_size = 1000;
        for( auto batch : { 1, 8, 48 } ) {
            // ---------------------------------------------------------------------------------------------------------
            {   // simple sample pattern of test with time measuring:
                bool local_ok = true;
                test_measurement_result local_result;
                local_result.description = "RUN PART: (batch " + std::to_string( batch ) + ") execution of " + test_description;
                C_time_control  local_timer;
                // begin local test

                auto input = new nn::data<float>( softmax_size, batch );
                if(input == nullptr)   throw std::runtime_error("unable to create input for batch = " +std::to_string(batch));

                auto workload_output = new nn::data<float>( softmax_size, batch );
                if(workload_output == nullptr)   throw std::runtime_error("unable to create workload_output for batch = " +std::to_string(batch));

                nn_data_populate( workload_output, 0.0f );

                nn_data_populate( input, 0.0f, 20.0f );

                nn_workload_t *workload = nullptr;
                nn_data_t *input_array[1] = { input };
                nn::data<float> *output_array_cmpl[1] = { nn::data_cast<float, 0>(workload_output) };

                auto status = di->workflow_compile_function( &workload, di->device, workflow, &input_format, &output_format, batch );
                if( !workload ) throw std::runtime_error( "workload compilation failed for batch = " + std::to_string( batch )
                                                          + " status: " + std::to_string( status ) );

                di->workload_execute_function( workload, reinterpret_cast<void**>(input_array), reinterpret_cast<void**>(output_array_cmpl), &status );

                auto naive_output = cpu_layer_softmax( input );

                local_ok = compare_data(workload_output, naive_output);

                // end of local test
                // summary:
                local_timer.tock();
                local_result.time_consumed = local_timer.get_time_diff();
                local_result.clocks_consumed = local_timer.get_clocks_diff();
                local_result.passed = local_ok;
                tests_results << local_result;

                run_ok = run_ok && local_ok;

                if( input )           delete input;
                if( workload_output ) delete workload_output;
                if( naive_output )    delete naive_output;
                if( workload )        delete workload;

            } // The pattern, of complex instruction above, can be multiplied
            // END of run tests
            // ---------------------------------------------------------------------------------------------------------
        }
    } catch( std::runtime_error &error ) {
        run_result << "error: " + std::string( error.what() );
        run_ok = false;
    } catch( std::exception &error ) {
        run_result << "error: " + std::string( error.what() );
        run_ok = false;
    } catch( ... ) {
        run_result << "unknown error";
        run_ok = false;
    }

    run_timer.tock();
    run_result.time_consumed = run_timer.get_time_diff();
    run_result.clocks_consumed = run_timer.get_clocks_diff();

    run_result.passed = run_ok;
    tests_results << run_result;
    if( !done() ) run_ok = false;
    std::cout << "<- Test " << (run_ok ? "passed" : "failed") << std::endl;;
    return run_ok;
}
コード例 #4
0
bool test_caffe_float_workload_cpu_time::run()
{
    bool  run_ok = true;
    test_measurement_result   run_result;
    run_result.description = "RUN SUMMARY: " + test_description;

    C_time_control  run_timer;

    std::cout << "-> Testing: " << test_description << std::endl;

    try {
        if(!init()) throw std::runtime_error("error: init() returns false so can't run test");
        run_timer.tick();   //start time measurement
        run_result << std::string("run test with " + current_tested_device->get_device_description());
        // ---------------------------------------------------------------------------------------------------------
        // TODO: here test code
        //{   // BKM pattern of test with time measuring:
        //    bool local_ok=true;
        //    test_measurement_result local_result;
        //    local_result.description = "RUN PART: (name part) of " + test_description;
        //    C_time_control  local_timer;
        //    // begin local test

        //    // end of local test
        //    // summary:
        //    local_timer.tock();
        //    local_result.time_consumed = local_timer.time_diff_string();
        //    local_result.clocks_consumed = local_timer.get_clocks_diff();
        //    tests_results << local_result;
        //} // The pattern, of complex instruction above, can be multiplied
        for(uint16_t batch :{1,8,48})
        {

            std::vector<uint64_t>     time_diffs;
            std::vector<uint64_t>     clock_diffs;

            nn::data<float,4>        *images = new nn::data<float,4>(img_size,img_size,3,batch);
            nn_data_populate(nn::data_cast<float,0>(images),0.0f,255.0f);
            nn_data_t *input_array[1] ={images};

            auto workload_output = new nn::data<float, 2>(1000, batch);
            nn::data<float> *output_array_cmpl[1] ={ nn::data_cast<float, 0>(workload_output) };

            nn_workload_t             *workload = nullptr;

            // compiling workload
            NN_WORKLOAD_DATA_TYPE input_format = NN_WORKLOAD_DATA_TYPE_F32_ZXY_BATCH;
            NN_WORKLOAD_DATA_TYPE output_format = NN_WORKLOAD_DATA_TYPE_F32_1D_BATCH;


            auto status = di->workflow_compile_function(&workload,di->device,workflow,&input_format,&output_format,batch);
            if(!workload) throw std::runtime_error("workload compilation failed for batch = " + std::to_string(batch)
                                                   + " status: " + std::to_string(status));

            test_measurement_result local_result;
            local_result.description = "RUN PART: (batch " + std::to_string(batch)+") execution of " + test_description;
            local_result.loops = loops;

            // begin local test
            for(auto i = 0; i< loops; ++i)
            {
                NN_API_STATUS   status;
                C_time_control  loop_timer;
                di->workload_execute_function(workload,reinterpret_cast<void**>(input_array),reinterpret_cast<void**>(output_array_cmpl),&status);
                loop_timer.tock();
                time_diffs.push_back(loop_timer.get_time_diff()/batch);
                clock_diffs.push_back(loop_timer.get_clocks_diff()/batch);
            }

            // end of local test
            // summary:
            uint64_t  min_value = *std::min_element(time_diffs.begin(),time_diffs.end());
            local_result.time_consumed = std::accumulate(time_diffs.begin(),time_diffs.end(),0.0)/time_diffs.size();
            local_result.time_consumed_min = min_value;
            local_result.time_consumed_max = *std::max_element(time_diffs.begin(),time_diffs.end());

            local_result << std::string("note: The shortest time for one image obtained from the chrono: "
                                        + C_time_control::time_diff_string(min_value));
            local_result << std::string("note: Values of time's and clock's were divided by current value of batch: "+std::to_string(batch));

            local_result.clocks_consumed = std::accumulate(clock_diffs.begin(),clock_diffs.end(),0.0)/clock_diffs.size();
            local_result.clocks_consumed_min = *std::min_element(clock_diffs.begin(),clock_diffs.end());
            local_result.clocks_consumed_max = *std::max_element(clock_diffs.begin(),clock_diffs.end());

            tests_results << local_result;
            if(images != nullptr) delete images;
            if(workload_output != nullptr) delete workload_output;
            if(workload != nullptr) di->workload_delete_function(workload);
        }
        // ---------------------------------------------------------------------------------------------------------
        run_ok = true;
    }
    catch(std::runtime_error &error) {
        run_result << "error: " + std::string(error.what());
        run_ok = false;
    }
    catch(...) {
        run_result << "error: unknown";
        run_ok = false;
    }

    run_timer.tock();
    run_result.time_consumed = run_timer.get_time_diff();
    run_result.clocks_consumed = run_timer.get_clocks_diff();

    run_result.passed = run_ok;
    tests_results << run_result;
    if (!done()) run_ok=false;
    std::cout << "<- Test " << (run_ok ? "passed" : "failed") << std::endl;;
    return run_ok;
}
コード例 #5
0
ファイル: test_view.cpp プロジェクト: Yeongjae/idlf
bool test_view::run() {
    bool  run_ok = true;
    test_measurement_result   run_result;
    run_result.description = "RUN SUMMARY: " + test_description;

    C_time_control  run_timer;

    std::cout << "-> Testing: " << test_description << std::endl;
    try {
        if( !init() ) throw std::runtime_error( "init() returns false so can't run test" );
        run_timer.tick();   //start time measurement
        run_result << std::string( "run test with " + current_tested_device->get_device_description() );

        NN_WORKLOAD_DATA_TYPE input_format  = NN_WORKLOAD_DATA_TYPE_F32_3D_BATCH;
        NN_WORKLOAD_DATA_TYPE output_format = NN_WORKLOAD_DATA_TYPE_F32_3D_BATCH;

        std::mt19937 generator( 1 );
        std::uniform_int_distribution<uint32_t> distribution( 0, 56/2 );

        auto compare_data = [](nn::workload_data<nn::layout_f32>& item, nn::data<float>& ref_item) {
                float relative_error_threshold = 1e-3f,
                      absolute_error_threshold = 1e-6f,
                      absoulte_error_limit     = 1e-4f;

                uint32_t size_n = item.get_length(0),
                         size_x = item.get_length(1),
                         size_y = item.get_length(2),
                         size_z = item.get_length(3);

                for(uint32_t n = 0; n < size_n; ++n)
                    for(uint32_t z = 0; z < size_z; ++z)
                        for( uint32_t y = 0; y < size_y; ++y )
                            for( uint32_t x = 0; x < size_x; ++x ) {
                                float workload_val = item.at(n, x, y, z, 0, 0);
                                float ref_val      = ref_item.at(z, x, y, n);

                                 if( fabs(workload_val) < absoulte_error_limit) {
                                    if(fabs( workload_val - ref_val ) > absolute_error_threshold) {
                                        return false;
                                    }
                                } else
                                    if(fabs(workload_val - ref_val) / fabs(ref_val) > relative_error_threshold)
                                        return false;
                            }
            return true;
        };

        for( uint32_t batch : { 1, 8, 48 } ) {
            // simple sample pattern of test with time measuring:
            bool local_ok = true;
            test_measurement_result local_result;
            local_result.description = "RUN PART: (batch " + std::to_string( batch ) + ") execution of " + test_description;
            C_time_control  local_timer;

            for(uint32_t size_x : { 5,16,56 }) {
                for(uint32_t size_y : { 5,16,56 }) {
                    for(uint32_t size_z : { 1,8,16 }) {
                        // ---------------------------------------------------------------------------------------------------------
                        // begin local test
                        auto input = new nn::data<float>(size_z,size_x,size_y,batch);
                        if(input == nullptr)   throw std::runtime_error("unable to create input nn::data for batch = " +std::to_string(batch));

                        nn_data_populate(input,-100.0f,100.0f);
                        auto wrkld_data = new nn::workload_data<nn::layout_f32>(input->buffer,
                                                                                {batch,size_x,size_y,size_z,1,1},
                                                                                 nn::data_helper_layout_lookup_zxynpq<float>()
                                                                               );
                        if(wrkld_data == nullptr) {
                            delete input;
                            throw std::runtime_error("unable to create wrkld_data for batch = " +std::to_string(batch));
                        }

                        nn_workload_data_coords_t* view_begin_coords,*view_end_coords;
                        { // create random view
                            view_begin_coords = new nn_workload_data_coords_t{
                                distribution(generator) % batch,
                                distribution(generator) % size_x,
                                distribution(generator) % size_y,
                                distribution(generator) % size_z,
                                0,
                                0
                            };
                            if(view_begin_coords == nullptr) {
                                delete input;
                                delete wrkld_data;
                                throw std::runtime_error("unable to create view_begin_coords for batch = " +std::to_string(batch));
                            }

                            view_end_coords  = new nn_workload_data_coords_t{
                                distribution(generator) % batch,
                                distribution(generator) % size_x,
                                distribution(generator) % size_y,
                                distribution(generator) % size_z,
                                0,
                                0
                            };
                            if(view_end_coords == nullptr) {
                                delete input;
                                delete wrkld_data;
                                delete view_begin_coords;
                                throw std::runtime_error("unable to create view_end_coords for batch = " +std::to_string(batch));
                            }

                            for(int i = 0 ; i <= 4 ; ++i)
                                if(view_begin_coords->t[i] > view_end_coords->t[i]) {
                                    std::swap(view_begin_coords->t[i],view_end_coords->t[i]);
                                }
                        }

                        // create view
                        auto workload_output = new nn::workload_data<nn::layout_f32>(*wrkld_data,*view_begin_coords,*view_end_coords);
                        if(workload_output == nullptr) {
                            delete input;
                            delete wrkld_data;
                            delete view_begin_coords;
                            delete view_end_coords;
                            delete workload_output;
                            throw std::runtime_error("unable to create workload_output nn::workload_data for batch = " +std::to_string(batch));
                        }

                        // naive view
                        auto naive_output = naive_view(*input,*view_begin_coords,*view_end_coords);

                        local_ok = compare_data(*workload_output,*naive_output);

                        if(input)            delete input;
                        if(workload_output)  delete workload_output;
                        if(naive_output)     delete naive_output;
                        if(view_begin_coords)delete view_begin_coords;
                        if(view_end_coords)  delete view_end_coords;
                        if(wrkld_data)       delete wrkld_data;
                        // END of run tests
                        // ---------------------------------------------------------------------------------------------------------
                    } // The pattern, of complex instruction above, can be multiplied
                }
            }
            // end of local test
            // summary:
            local_timer.tock();
            local_result.time_consumed = local_timer.get_time_diff();
            local_result.clocks_consumed = local_timer.get_clocks_diff();
            local_result.passed = local_ok;
            tests_results << local_result;

            run_ok = run_ok && local_ok;
        }
    }
    catch(std::runtime_error &error) {
        run_result << "error: " + std::string(error.what());
        run_ok = false;
    }
    catch(std::exception &error) {
        run_result << "error: " + std::string(error.what());
        run_ok = false;
    }
    catch(...) {
        run_result << "unknown error";
        run_ok = false;
    }
    run_timer.tock();
    run_result.time_consumed = run_timer.get_time_diff();
    run_result.clocks_consumed = run_timer.get_clocks_diff();

    run_result.passed = run_ok;
    tests_results << run_result;
    if(!done()) run_ok = false;
    std::cout << "<- Test " << (run_ok ? "passed" : "failed") << std::endl;
    return run_ok;
}