bool test_convolution_float_cpu_random::run() { bool run_ok = true; test_measurement_result run_result; run_result.description = "RUN SUMMARY: " + test_description; std::cout << "-> Testing: " << test_description << std::endl; try { if(!init()) throw std::runtime_error( "init() returns false so can't run test" ); NN_WORKLOAD_DATA_TYPE input_format = NN_WORKLOAD_DATA_TYPE_F32_3D_BATCH; NN_WORKLOAD_DATA_TYPE output_format = NN_WORKLOAD_DATA_TYPE_F32_3D_BATCH; for(uint32_t batch : { 1, 8, 48 }) { bool local_ok = true; test_measurement_result local_result; local_result.description = "RUN PART: (batch " + std::to_string( batch ) + ") execution of " + test_description; C_time_control local_timer; // begin local test uint32_t z = 2, img_size = 227, num_features_map = 8; nn::data<float, 4> *images = new nn::data<float, 4>( img_size, img_size, z, batch ); if(images == nullptr) throw std::runtime_error("Cant't create images nn::data"); nn_data_populate( nn::data_cast<float, 0>(images), 0.0f, 255.0f ); nn::data<float, 4> *images_with_padding = new nn::data<float, 4>( img_size + 2, img_size + 2, z, batch ); if(images_with_padding == nullptr) { delete images; throw std::runtime_error("Cant't create images_with_padding nn::data"); } { // padding for input for naive method nn_data_populate( nn::data_cast<float, 0>(images_with_padding), 0.0f ); for(uint32_t tmp_batch = 0; tmp_batch < batch; ++tmp_batch) for(uint32_t tmp_z = 0; tmp_z < z; ++tmp_z) for(uint32_t y = 0; y < img_size; ++y) for(uint32_t x = 0; x < img_size; ++x) images_with_padding->at( x, y, tmp_z, tmp_batch ) = images->at( x, y, tmp_z, tmp_batch ); } nn_workload_t *workload = nullptr; nn_data_t *input_array[1] = { images }; auto workload_output = new nn::data<float, 4>( img_size, img_size, num_features_map, batch ); if(workload_output==nullptr) { delete images; delete images_with_padding; throw std::runtime_error("unable to create workload_output for batch = " +std::to_string(batch)); } nn::data<float> *output_array_cmpl[1] = { nn::data_cast<float, 0>(workload_output) }; auto naive_output = new nn::data<float, 4>( img_size, img_size, num_features_map, batch ); if(naive_output==nullptr) { delete images; delete images_with_padding; delete workload_output; throw std::runtime_error("unable to create naive_output for batch = " +std::to_string(batch)); } auto status = di->workflow_compile_function( &workload, di->device, workflow, &input_format, &output_format, batch ); if(!workload) throw std::runtime_error( "workload compilation failed for batch = " + std::to_string( batch ) + " status: " + std::to_string( status ) ); test_measurement_result run_result; run_result.description = "RUN PART: (batch " + std::to_string( batch ) + ") execution of " + test_description; // changing order needed //di->workload_execute_function( workload, reinterpret_cast<void**>(input_array), reinterpret_cast<void**>(output_array_cmpl), &status ); float* biases = nullptr; float* weights = nullptr; { // read biases and weights if(NN_WORK_ITEM_TYPE_CONVOLUTION == workflow->input[0]->use[0].item->type) { auto tmp = reinterpret_cast<nn_arguments_forward_convolution_t*>(&workflow->input[0]->use[0].item->arguments); biases = reinterpret_cast<float*>(tmp->biases->buffer); weights = reinterpret_cast<float*>(tmp->weights->buffer); } } if(nullptr == biases || nullptr == weights) throw std::runtime_error( "reading weight or biases for naive version failed for batch = " + std::to_string( batch ) ); naive_convolv_float_implementation( reinterpret_cast<float*>(images_with_padding->buffer), reinterpret_cast<float*>(naive_output->buffer), biases, weights, batch, num_features_map, z, img_size, img_size, img_size + 2, img_size + 2, 3, 3, 1, 1, NN_ACTIVATION_FUNCTION_RELU ); //local_ok = compare_4d_data( workload_output, naive_output ); local_ok = true; // BLIND TEST // end of local test // summary: local_timer.tock(); local_result.time_consumed = local_timer.get_time_diff(); local_result.clocks_consumed = local_timer.get_clocks_diff(); local_result.passed = local_ok; tests_results << local_result; run_ok = run_ok && local_ok; if(workload_output) delete workload_output; if(naive_output) delete naive_output; if(images) delete images; if(images_with_padding) delete images_with_padding; } } catch(std::runtime_error &error) { tests_results << run_result; std::cout << "error: " << error.what() << std::endl; } catch(std::exception &error) { tests_results << run_result; std::cout << "error: " << error.what() << std::endl; } catch(...) { tests_results << run_result; std::cout << "error: unknown" << std::endl; } if(!done()) run_ok = false; std::cout << "<- Test " << (run_ok ? "passed" : "failed") << std::endl;; return run_ok; }
virtual nn_workflow_t *init_test_workflow(nn_device_interface_0_t *_di) { if(!is_valid()) throw std::runtime_error(error_); for(auto wi : workflow_layer) wi = nullptr; for(auto wb : workflow_layer_factor) wb = nullptr; this->di = _di; // create and populate nn:data factors (weights and biases) for successive layers workflow_layer_factor[mean_factor] = new nn::data<float>(img_size,img_size,3); nn_data_populate(workflow_layer_factor[mean_factor],104.007f,122.679f); workflow_layer_factor[conv1_weights] = new nn::data<float>(11,11,3,96); nn_data_populate(workflow_layer_factor[conv1_weights],-0.374f,0.403f); workflow_layer_factor[conv1_biases] = new nn::data<float>(96); nn_data_populate(workflow_layer_factor[conv1_biases],-0.854f,0.232f); workflow_layer_factor[conv2_1_weights] = new nn::data<float>(5,5,48,128); nn_data_populate(workflow_layer_factor[conv2_1_weights],-0.285f,0.379f); workflow_layer_factor[conv2_1_biases] = new nn::data<float>(128); nn_data_populate(workflow_layer_factor[conv2_1_biases],0.974f,1.034f); workflow_layer_factor[conv2_2_weights] = new nn::data<float>(5,5,48,128); nn_data_populate(workflow_layer_factor[conv2_2_weights],-0.269f,0.416f); workflow_layer_factor[conv2_2_biases] = new nn::data<float>(128); nn_data_populate(workflow_layer_factor[conv2_2_biases],0.958f,1.027f); workflow_layer_factor[conv3_weights] = new nn::data<float>(3,3,256,384); nn_data_populate(workflow_layer_factor[conv3_weights],-0.185f,0.512f); workflow_layer_factor[conv3_biases] = new nn::data<float>(384); nn_data_populate(workflow_layer_factor[conv3_biases],-0.104f,0.093f); workflow_layer_factor[conv4_1_weights] = new nn::data<float>(3,3,192,192); nn_data_populate(workflow_layer_factor[conv4_1_weights],-0.103f,0.322f); workflow_layer_factor[conv4_1_biases] = new nn::data<float>(192); nn_data_populate(workflow_layer_factor[conv4_1_biases],0.844f,1.142f); workflow_layer_factor[conv4_2_weights] = new nn::data<float>(3,3,192,192); nn_data_populate(workflow_layer_factor[conv4_2_weights],-0.142f,0.353f); workflow_layer_factor[conv4_2_biases] = new nn::data<float>(192); nn_data_populate(workflow_layer_factor[conv4_2_biases],0.77f,1.219f); workflow_layer_factor[conv5_1_weights] = new nn::data<float>(3,3,192,128); nn_data_populate(workflow_layer_factor[conv5_1_weights],-0.092f,0.254f); workflow_layer_factor[conv5_1_biases] = new nn::data<float>(128); nn_data_populate(workflow_layer_factor[conv5_1_biases],0.723f,1.50f); workflow_layer_factor[conv5_2_weights] = new nn::data<float>(3,3,192,128); nn_data_populate(workflow_layer_factor[conv5_2_weights],-0.133f,0.315f); workflow_layer_factor[conv5_2_biases] = new nn::data<float>(128); nn_data_populate(workflow_layer_factor[conv5_2_biases],0.623f,1.742f); workflow_layer_factor[fc6_weights] = new nn::data<float>(6,6,256,4096); nn_data_populate(workflow_layer_factor[fc6_weights],-0.035f,0.048f); workflow_layer_factor[fc6_biases] = new nn::data<float>(4096); nn_data_populate(workflow_layer_factor[fc6_biases],0.92f,1.057f); workflow_layer_factor[fc7_weights] = new nn::data<float>(4096,4096); nn_data_populate(workflow_layer_factor[fc7_weights],-0.032f,0.052f); workflow_layer_factor[fc7_biases] = new nn::data<float>(4096); nn_data_populate(workflow_layer_factor[fc7_biases],0.741f,1.26f); workflow_layer_factor[fc8_weights] = new nn::data<float>(4096,1000); nn_data_populate(workflow_layer_factor[fc8_weights],-0.045f,0.067f); workflow_layer_factor[fc8_biases] = new nn::data<float>(1000); nn_data_populate(workflow_layer_factor[fc8_biases],-0.351f,0.425f); di->workflow_create_function(&workflow,1,1); // ------------------------------------------------------------------------------------------ // STAGE 0 (input) // output: 227x227x3 { di->workflow_item_create_function(&workflow_layer[input],0,nullptr,1); workflow_layer[input]->type = NN_WORK_ITEM_TYPE_INPUT; workflow_layer[input]->arguments.input.index = 0; workflow_layer[input]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[input]->output_format[0].format_3d ={{img_size,img_size,3}}; } // ------------------------------------------------------------------------------------------ // STAGE 0 (imagenet_mean_subtract) // output: 227x227x3 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[input],0}; di->workflow_item_create_function(&workflow_layer[mean_substract],1,&inputs_descriptor,1); workflow_layer[mean_substract]->type = NN_WORK_ITEM_TYPE_ARITHMETIC; workflow_layer[mean_substract]->arguments.forward_arithmetic.factor = workflow_layer_factor[mean_factor]; workflow_layer[mean_substract]->arguments.forward_arithmetic.arithmetic_function = NN_ARITHMETIC_FUNCTION_SUBTRACTION; workflow_layer[mean_substract]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[mean_substract]->output_format[0].format_3d ={{img_size,img_size,3}}; } // ------------------------------------------------------------------------------------------ // STAGE 01 // convo: 11x11 stride 4x4; ReLU; output: 55x55x96 // maxpool: 3x3 stride 2x2; // norm: RESPONSE_ACROSS_MAPS // output: 27x27x96 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[mean_substract],0}; di->workflow_item_create_function(&workflow_layer[conv1],1,&inputs_descriptor,1); workflow_layer[conv1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv1]->name = "c1"; workflow_layer[conv1]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv1]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv1]->arguments.forward_convolution.weights = workflow_layer_factor[conv1_weights]; workflow_layer[conv1]->arguments.forward_convolution.biases = workflow_layer_factor[conv1_biases]; workflow_layer[conv1]->arguments.forward_convolution.center_offset[0] = 0; workflow_layer[conv1]->arguments.forward_convolution.center_offset[1] = 0; workflow_layer[conv1]->arguments.forward_convolution.stride[0] = 4; workflow_layer[conv1]->arguments.forward_convolution.stride[1] = 4; workflow_layer[conv1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv1]->output_format[0].format_3d ={{55,55,96}}; } { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv1],0}; di->workflow_item_create_function(&workflow_layer[pool1],1,&inputs_descriptor,1); workflow_layer[pool1]->type = NN_WORK_ITEM_TYPE_POOLING; workflow_layer[pool1]->name = "p1"; workflow_layer[pool1]->arguments.forward_pooling.mode = NN_POOLING_MODE_MAX; workflow_layer[pool1]->arguments.forward_pooling.size[0] = 3; workflow_layer[pool1]->arguments.forward_pooling.size[1] = 3; workflow_layer[pool1]->arguments.forward_pooling.stride[0] = 2; workflow_layer[pool1]->arguments.forward_pooling.stride[1] = 2; workflow_layer[pool1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[pool1]->output_format[0].format_3d ={{27,27,96}}; } { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[pool1],0}; di->workflow_item_create_function(&workflow_layer[norm1],1,&inputs_descriptor,1); workflow_layer[norm1]->type = NN_WORK_ITEM_TYPE_NORMALIZATION; workflow_layer[norm1]->name = "lrn1"; workflow_layer[norm1]->arguments.forward_normalization.normalization.mode = NN_NORMALIZATION_MODE_RESPONSE_ACROSS_MAPS; workflow_layer[norm1]->arguments.forward_normalization.normalization.k = 1; // in Krishevsky's article is 2 workflow_layer[norm1]->arguments.forward_normalization.normalization.n = 5; workflow_layer[norm1]->arguments.forward_normalization.normalization.alpha = 0.0001f/5; // in Krishevsky's paper is 1e-4, // but didn't write that sum of the squares // is divided by number of elements (n) workflow_layer[norm1]->arguments.forward_normalization.normalization.beta = 0.75f; workflow_layer[norm1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[norm1]->output_format[0].format_3d ={{27,27,96}}; } // ------------------------------------------------------------------------------------------ // STAGE 02 // split: 2 (z-axis 96/2); output 27x27x(2*96/2) // convo: 5x5 stride 1x1; ReLU; 0-padded output: 27x27x(2*256/2) // merge: (z-axis) // maxpool: 3x3 stride 2x2; // norm: RESPONSE_ACROSS_MAPS // output: 13x13x256 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[norm1],0}; di->workflow_item_create_function(&workflow_layer[subv1_1],1,&inputs_descriptor,1); // view g1 workflow_layer[subv1_1]->type = NN_WORK_ITEM_TYPE_VIEW; workflow_layer[subv1_1]->arguments.view.origin[0] = 0; workflow_layer[subv1_1]->arguments.view.origin[1] = 0; workflow_layer[subv1_1]->arguments.view.origin[2] = 0; workflow_layer[subv1_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[subv1_1]->output_format[0].format_3d ={{27,27,96/2}}; } { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[norm1],0}; di->workflow_item_create_function(&workflow_layer[subv1_2],1,&inputs_descriptor,1); // view g2 workflow_layer[subv1_2]->type = NN_WORK_ITEM_TYPE_VIEW; workflow_layer[subv1_2]->arguments.view.origin[0] = 0; workflow_layer[subv1_2]->arguments.view.origin[1] = 0; workflow_layer[subv1_2]->arguments.view.origin[2] = (96/2); workflow_layer[subv1_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[subv1_2]->output_format[0].format_3d ={{27,27,96/2}}; } // convolution 2, g1: 5x5 stride 1x1; ReLU; 0-padded output: 13x13x(2*96/2) { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[subv1_1],0}; di->workflow_item_create_function(&workflow_layer[conv2_1],1,&inputs_descriptor,1); workflow_layer[conv2_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv2_1]->name = "c2g1"; workflow_layer[conv2_1]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv2_1]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv2_1]->arguments.forward_convolution.weights = workflow_layer_factor[conv2_1_weights]; workflow_layer[conv2_1]->arguments.forward_convolution.biases = workflow_layer_factor[conv2_1_biases]; workflow_layer[conv2_1]->arguments.forward_convolution.center_offset[0] = 2; workflow_layer[conv2_1]->arguments.forward_convolution.center_offset[1] = 2; workflow_layer[conv2_1]->arguments.forward_convolution.stride[0] = 1; workflow_layer[conv2_1]->arguments.forward_convolution.stride[1] = 1; workflow_layer[conv2_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv2_1]->output_format[0].format_3d ={{27,27,256/2}}; } // convolution 2, g2: 5x5 stride 1x1; ReLU; 0-padded output: 13x13x(2*96/2) { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[subv1_2],0}; di->workflow_item_create_function(&workflow_layer[conv2_2],1,&inputs_descriptor,1); workflow_layer[conv2_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv2_2]->name = "c2g2"; workflow_layer[conv2_2]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv2_2]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv2_2]->arguments.forward_convolution.weights = workflow_layer_factor[conv2_2_weights]; workflow_layer[conv2_2]->arguments.forward_convolution.biases = workflow_layer_factor[conv2_2_biases]; workflow_layer[conv2_2]->arguments.forward_convolution.center_offset[0] = 2; workflow_layer[conv2_2]->arguments.forward_convolution.center_offset[1] = 2; workflow_layer[conv2_2]->arguments.forward_convolution.stride[0] = 1; workflow_layer[conv2_2]->arguments.forward_convolution.stride[1] = 1; workflow_layer[conv2_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv2_2]->output_format[0].format_3d ={{27,27,256/2}}; } // merge g1 and g2 { nn_workflow_use_descriptor_t inputs_descriptor[] ={{workflow_layer[conv2_1],0},{workflow_layer[conv2_2],0}}; di->workflow_item_create_function(&workflow_layer[merge2],2,inputs_descriptor,1); workflow_layer[merge2]->type = NN_WORK_ITEM_TYPE_MERGE; workflow_layer[merge2]->arguments.forward_merge.axis = 2; // value 2 for z-axis workflow_layer[merge2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[merge2]->output_format[0].format_3d ={{27,27,256}}; } // maxpool: 3x3 stride 2x2; { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[merge2],0}; di->workflow_item_create_function(&workflow_layer[pool2],1,&inputs_descriptor,1); // pooling workflow_layer[pool2]->type = NN_WORK_ITEM_TYPE_POOLING; workflow_layer[pool2]->name = "p2"; workflow_layer[pool2]->arguments.forward_pooling.mode = NN_POOLING_MODE_MAX; workflow_layer[pool2]->arguments.forward_pooling.size[0] = 3; workflow_layer[pool2]->arguments.forward_pooling.size[1] = 3; workflow_layer[pool2]->arguments.forward_pooling.stride[0] = 2; workflow_layer[pool2]->arguments.forward_pooling.stride[1] = 2; workflow_layer[pool2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[pool2]->output_format[0].format_3d ={{13,13,256}}; } //norm: RESPONSE_ACROSS_MAPS; output: 13x13x256 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[pool2],0}; di->workflow_item_create_function(&workflow_layer[norm2],1,&inputs_descriptor,1); workflow_layer[norm2]->type = NN_WORK_ITEM_TYPE_NORMALIZATION; workflow_layer[norm2]->name = "lrn2"; workflow_layer[norm2]->arguments.forward_normalization.normalization.mode = NN_NORMALIZATION_MODE_RESPONSE_ACROSS_MAPS; workflow_layer[norm2]->arguments.forward_normalization.normalization.k = 1; // | workflow_layer[norm2]->arguments.forward_normalization.normalization.n = 5; // | workflow_layer[norm2]->arguments.forward_normalization.normalization.alpha = 0.0001f/5; // > see coment at wrkflwi_stage_1_norm workflow_layer[norm2]->arguments.forward_normalization.normalization.beta = 0.75f; // | workflow_layer[norm2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[norm2]->output_format[0].format_3d ={{13,13,256}}; } // ------------------------------------------------------------------------------------------ // STAGE 03 // convo: 3x3 stride 1x1; ReLU; 0-padded // output: 13x13x384 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[norm2],0}; di->workflow_item_create_function(&workflow_layer[conv3],1,&inputs_descriptor,1); workflow_layer[conv3]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv3]->name = "c3"; workflow_layer[conv3]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv3]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv3]->arguments.forward_convolution.weights = workflow_layer_factor[conv3_weights]; workflow_layer[conv3]->arguments.forward_convolution.biases = workflow_layer_factor[conv3_biases]; workflow_layer[conv3]->arguments.forward_convolution.center_offset[0] = 1; workflow_layer[conv3]->arguments.forward_convolution.center_offset[1] = 1; workflow_layer[conv3]->arguments.forward_convolution.stride[0] = 1; workflow_layer[conv3]->arguments.forward_convolution.stride[1] = 1; workflow_layer[conv3]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv3]->output_format[0].format_3d ={{13,13,384}}; } // ------------------------------------------------------------------------------------------ // STAGE 04 // split: 2 (z-axis 384/2) // convo: 3x3 stride 1x1; ReLU; 0-padded // output: 13x13x(2*384/2) (continue split to next stage) { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv3],0}; di->workflow_item_create_function(&workflow_layer[subv3_1],1,&inputs_descriptor,1); // view g1 workflow_layer[subv3_1]->type = NN_WORK_ITEM_TYPE_VIEW; workflow_layer[subv3_1]->arguments.view.origin[0] = 0; workflow_layer[subv3_1]->arguments.view.origin[1] = 0; workflow_layer[subv3_1]->arguments.view.origin[2] = 0; workflow_layer[subv3_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[subv3_1]->output_format[0].format_3d ={{13,13,384/2}}; } { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv3],0}; di->workflow_item_create_function(&workflow_layer[subv3_2],1,&inputs_descriptor,1); // view g2 workflow_layer[subv3_2]->type = NN_WORK_ITEM_TYPE_VIEW; workflow_layer[subv3_2]->arguments.view.origin[0] = 0; workflow_layer[subv3_2]->arguments.view.origin[1] = 0; workflow_layer[subv3_2]->arguments.view.origin[2] = 384/2; workflow_layer[subv3_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[subv3_2]->output_format[0].format_3d ={{13,13,384/2}}; } { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[subv3_1],0}; di->workflow_item_create_function(&workflow_layer[conv4_1],1,&inputs_descriptor,1); // conv g1 workflow_layer[conv4_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv4_1]->name = "c4g1"; workflow_layer[conv4_1]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv4_1]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv4_1]->arguments.forward_convolution.weights = workflow_layer_factor[conv4_1_weights]; workflow_layer[conv4_1]->arguments.forward_convolution.biases = workflow_layer_factor[conv4_1_biases]; workflow_layer[conv4_1]->arguments.forward_convolution.center_offset[0] = 1; workflow_layer[conv4_1]->arguments.forward_convolution.center_offset[1] = 1; workflow_layer[conv4_1]->arguments.forward_convolution.stride[0] = 1; workflow_layer[conv4_1]->arguments.forward_convolution.stride[1] = 1; workflow_layer[conv4_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv4_1]->output_format[0].format_3d ={{13,13,384/2}}; } { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[subv3_2],0}; di->workflow_item_create_function(&workflow_layer[conv4_2],1,&inputs_descriptor,1); // conv g2 workflow_layer[conv4_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv4_2]->name = "c4g2"; workflow_layer[conv4_2]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv4_2]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv4_2]->arguments.forward_convolution.weights = workflow_layer_factor[conv4_1_weights]; workflow_layer[conv4_2]->arguments.forward_convolution.biases = workflow_layer_factor[conv4_2_biases]; workflow_layer[conv4_2]->arguments.forward_convolution.center_offset[0] = 1; workflow_layer[conv4_2]->arguments.forward_convolution.center_offset[1] = 1; workflow_layer[conv4_2]->arguments.forward_convolution.stride[0] = 1; workflow_layer[conv4_2]->arguments.forward_convolution.stride[1] = 1; workflow_layer[conv4_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv4_2]->output_format[0].format_3d ={{13,13,384/2}}; } // ------------------------------------------------------------------------------------------ // STAGE 05 // convo: 3x3 stride 1x1; ReLU; 0-padded; output: 13x13x(2*256/2) // merge: (z-axis) // maxpool: 3x3 stride 2x2; // output: 13x13x256 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv4_1],0}; di->workflow_item_create_function(&workflow_layer[conv5_1],1,&inputs_descriptor,1); // conv g1 workflow_layer[conv5_1]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv5_1]->name = "c5g1"; workflow_layer[conv5_1]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv5_1]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv5_1]->arguments.forward_convolution.weights = workflow_layer_factor[conv5_1_weights]; workflow_layer[conv5_1]->arguments.forward_convolution.biases = workflow_layer_factor[conv5_1_biases]; workflow_layer[conv5_1]->arguments.forward_convolution.center_offset[0] = 1; workflow_layer[conv5_1]->arguments.forward_convolution.center_offset[1] = 1; workflow_layer[conv5_1]->arguments.forward_convolution.stride[0] = 1; workflow_layer[conv5_1]->arguments.forward_convolution.stride[1] = 1; workflow_layer[conv5_1]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv5_1]->output_format[0].format_3d ={{13,13,256/2}}; } { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[conv4_2],0}; di->workflow_item_create_function(&workflow_layer[conv5_2],1,&inputs_descriptor,1); // conv g2 workflow_layer[conv5_2]->type = NN_WORK_ITEM_TYPE_CONVOLUTION; workflow_layer[conv5_2]->name = "c5g2"; workflow_layer[conv5_2]->arguments.forward_convolution.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[conv5_2]->arguments.forward_convolution.padding = NN_PADDING_MODE_DATA_OR_ZERO; workflow_layer[conv5_2]->arguments.forward_convolution.weights = workflow_layer_factor[conv5_2_weights]; workflow_layer[conv5_2]->arguments.forward_convolution.biases = workflow_layer_factor[conv5_2_biases]; workflow_layer[conv5_2]->arguments.forward_convolution.center_offset[0] = 1; workflow_layer[conv5_2]->arguments.forward_convolution.center_offset[1] = 1; workflow_layer[conv5_2]->arguments.forward_convolution.stride[0] = 1; workflow_layer[conv5_2]->arguments.forward_convolution.stride[1] = 1; workflow_layer[conv5_2]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[conv5_2]->output_format[0].format_3d ={{13,13,256/2}}; } // merge g1 and g2 { nn_workflow_use_descriptor_t inputs_descriptor[] ={{workflow_layer[conv5_1],0},{workflow_layer[conv5_2],0}}; di->workflow_item_create_function(&workflow_layer[merge5],2,inputs_descriptor,1); workflow_layer[merge5]->type = NN_WORK_ITEM_TYPE_MERGE; workflow_layer[merge5]->arguments.forward_merge.axis = 2; // value 2 for z-axis workflow_layer[merge5]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[merge5]->output_format[0].format_3d ={{13,13,256}}; } // maxpool: 3x3 stride 2x2; { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[merge5],0}; di->workflow_item_create_function(&workflow_layer[pool5],1,&inputs_descriptor,1); // pooling workflow_layer[pool5]->type = NN_WORK_ITEM_TYPE_POOLING; workflow_layer[pool5]->name = "p5"; workflow_layer[pool5]->arguments.forward_pooling.mode = NN_POOLING_MODE_MAX; workflow_layer[pool5]->arguments.forward_pooling.size[0] = 3; workflow_layer[pool5]->arguments.forward_pooling.size[1] = 3; workflow_layer[pool5]->arguments.forward_pooling.stride[0] = 2; workflow_layer[pool5]->arguments.forward_pooling.stride[1] = 2; workflow_layer[pool5]->output_format[0].format = NN_DATA_FORMAT_3D; workflow_layer[pool5]->output_format[0].format_3d ={{6,6,256}}; } // ------------------------------------------------------------------------------------------ // STAGE 06 // full: ReLU // output: 4096 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[pool5],0}; di->workflow_item_create_function(&workflow_layer[fc6],1,&inputs_descriptor,1); workflow_layer[fc6]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED; workflow_layer[fc6]->name = "fc6"; workflow_layer[fc6]->arguments.forward_fully_connected.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[fc6]->arguments.forward_fully_connected.weights = workflow_layer_factor[fc6_weights]; workflow_layer[fc6]->arguments.forward_fully_connected.biases = workflow_layer_factor[fc6_biases]; workflow_layer[fc6]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[fc6]->output_format[0].format_1d ={{4096}}; } // ------------------------------------------------------------------------------------------ // STAGE 07 // full: ReLU // output: 4096 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[fc6],0}; di->workflow_item_create_function(&workflow_layer[fc7],1,&inputs_descriptor,1); workflow_layer[fc7]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED; workflow_layer[fc7]->name = "fc7"; workflow_layer[fc7]->arguments.forward_fully_connected.activation.function = NN_ACTIVATION_FUNCTION_RELU; workflow_layer[fc7]->arguments.forward_fully_connected.weights = workflow_layer_factor[fc7_weights]; workflow_layer[fc7]->arguments.forward_fully_connected.biases = workflow_layer_factor[fc7_biases]; workflow_layer[fc7]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[fc7]->output_format[0].format_1d ={{4096}}; } // ------------------------------------------------------------------------------------------ // STAGE 08 // full: ; // output: 1000 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[fc7],0}; di->workflow_item_create_function(&workflow_layer[fc8],1,&inputs_descriptor,1); workflow_layer[fc8]->type = NN_WORK_ITEM_TYPE_FULLY_CONNECTED; workflow_layer[fc8]->name = "fc8"; workflow_layer[fc8]->arguments.forward_fully_connected.activation.function = NN_ACTIVATION_FUNCTION_NONE; workflow_layer[fc8]->arguments.forward_fully_connected.weights = workflow_layer_factor[fc8_weights]; workflow_layer[fc8]->arguments.forward_fully_connected.biases = workflow_layer_factor[fc8_biases]; workflow_layer[fc8]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[fc8]->output_format[0].format_1d ={{1000}}; } // ------------------------------------------------------------------------------------------ // STAGE 09 (softmax) // output: 1000 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[fc8],0}; di->workflow_item_create_function(&workflow_layer[softmax],1,&inputs_descriptor,1); workflow_layer[softmax]->type = NN_WORK_ITEM_TYPE_SOFTMAX; workflow_layer[softmax]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[softmax]->output_format[0].format_1d ={{1000}}; } // ------------------------------------------------------------------------------------------ // STAGE 10 (output) // output: 1000 { nn_workflow_use_descriptor_t inputs_descriptor ={workflow_layer[softmax],0}; di->workflow_item_create_function(&workflow_layer[output],1,&inputs_descriptor,1); workflow_layer[output]->type = NN_WORK_ITEM_TYPE_OUTPUT; workflow_layer[output]->output_format[0].format = NN_DATA_FORMAT_1D; workflow_layer[output]->output_format[0].format_1d ={{1000}}; } // ------------------------------------------------------------------------------------------- // END of workflow stages definition // ------------------------------------------------------------------------------------------- workflow->input[0] = workflow_layer[input]; workflow->output[0] = workflow_layer[output]; // ------------------------------------------------------------------------------------------- return workflow; }
bool test_softmax_float_cpu_random::run() { bool run_ok = true; test_measurement_result run_result; run_result.description = "RUN SUMMARY: " + test_description; C_time_control run_timer; std::cout << "-> Testing: " << test_description << std::endl; try { if( !init() ) throw std::runtime_error( "init() returns false so can't run test" ); run_timer.tick(); //start time measurement run_result << std::string( "run test with " + current_tested_device->get_device_description() ); NN_WORKLOAD_DATA_TYPE input_format = NN_WORKLOAD_DATA_TYPE_F32_1D_BATCH; NN_WORKLOAD_DATA_TYPE output_format = NN_WORKLOAD_DATA_TYPE_F32_1D_BATCH; const int softmax_size = 1000; for( auto batch : { 1, 8, 48 } ) { // --------------------------------------------------------------------------------------------------------- { // simple sample pattern of test with time measuring: bool local_ok = true; test_measurement_result local_result; local_result.description = "RUN PART: (batch " + std::to_string( batch ) + ") execution of " + test_description; C_time_control local_timer; // begin local test auto input = new nn::data<float>( softmax_size, batch ); if(input == nullptr) throw std::runtime_error("unable to create input for batch = " +std::to_string(batch)); auto workload_output = new nn::data<float>( softmax_size, batch ); if(workload_output == nullptr) throw std::runtime_error("unable to create workload_output for batch = " +std::to_string(batch)); nn_data_populate( workload_output, 0.0f ); nn_data_populate( input, 0.0f, 20.0f ); nn_workload_t *workload = nullptr; nn_data_t *input_array[1] = { input }; nn::data<float> *output_array_cmpl[1] = { nn::data_cast<float, 0>(workload_output) }; auto status = di->workflow_compile_function( &workload, di->device, workflow, &input_format, &output_format, batch ); if( !workload ) throw std::runtime_error( "workload compilation failed for batch = " + std::to_string( batch ) + " status: " + std::to_string( status ) ); di->workload_execute_function( workload, reinterpret_cast<void**>(input_array), reinterpret_cast<void**>(output_array_cmpl), &status ); auto naive_output = cpu_layer_softmax( input ); local_ok = compare_data(workload_output, naive_output); // end of local test // summary: local_timer.tock(); local_result.time_consumed = local_timer.get_time_diff(); local_result.clocks_consumed = local_timer.get_clocks_diff(); local_result.passed = local_ok; tests_results << local_result; run_ok = run_ok && local_ok; if( input ) delete input; if( workload_output ) delete workload_output; if( naive_output ) delete naive_output; if( workload ) delete workload; } // The pattern, of complex instruction above, can be multiplied // END of run tests // --------------------------------------------------------------------------------------------------------- } } catch( std::runtime_error &error ) { run_result << "error: " + std::string( error.what() ); run_ok = false; } catch( std::exception &error ) { run_result << "error: " + std::string( error.what() ); run_ok = false; } catch( ... ) { run_result << "unknown error"; run_ok = false; } run_timer.tock(); run_result.time_consumed = run_timer.get_time_diff(); run_result.clocks_consumed = run_timer.get_clocks_diff(); run_result.passed = run_ok; tests_results << run_result; if( !done() ) run_ok = false; std::cout << "<- Test " << (run_ok ? "passed" : "failed") << std::endl;; return run_ok; }
bool test_caffe_float_workload_cpu_time::run() { bool run_ok = true; test_measurement_result run_result; run_result.description = "RUN SUMMARY: " + test_description; C_time_control run_timer; std::cout << "-> Testing: " << test_description << std::endl; try { if(!init()) throw std::runtime_error("error: init() returns false so can't run test"); run_timer.tick(); //start time measurement run_result << std::string("run test with " + current_tested_device->get_device_description()); // --------------------------------------------------------------------------------------------------------- // TODO: here test code //{ // BKM pattern of test with time measuring: // bool local_ok=true; // test_measurement_result local_result; // local_result.description = "RUN PART: (name part) of " + test_description; // C_time_control local_timer; // // begin local test // // end of local test // // summary: // local_timer.tock(); // local_result.time_consumed = local_timer.time_diff_string(); // local_result.clocks_consumed = local_timer.get_clocks_diff(); // tests_results << local_result; //} // The pattern, of complex instruction above, can be multiplied for(uint16_t batch :{1,8,48}) { std::vector<uint64_t> time_diffs; std::vector<uint64_t> clock_diffs; nn::data<float,4> *images = new nn::data<float,4>(img_size,img_size,3,batch); nn_data_populate(nn::data_cast<float,0>(images),0.0f,255.0f); nn_data_t *input_array[1] ={images}; auto workload_output = new nn::data<float, 2>(1000, batch); nn::data<float> *output_array_cmpl[1] ={ nn::data_cast<float, 0>(workload_output) }; nn_workload_t *workload = nullptr; // compiling workload NN_WORKLOAD_DATA_TYPE input_format = NN_WORKLOAD_DATA_TYPE_F32_ZXY_BATCH; NN_WORKLOAD_DATA_TYPE output_format = NN_WORKLOAD_DATA_TYPE_F32_1D_BATCH; auto status = di->workflow_compile_function(&workload,di->device,workflow,&input_format,&output_format,batch); if(!workload) throw std::runtime_error("workload compilation failed for batch = " + std::to_string(batch) + " status: " + std::to_string(status)); test_measurement_result local_result; local_result.description = "RUN PART: (batch " + std::to_string(batch)+") execution of " + test_description; local_result.loops = loops; // begin local test for(auto i = 0; i< loops; ++i) { NN_API_STATUS status; C_time_control loop_timer; di->workload_execute_function(workload,reinterpret_cast<void**>(input_array),reinterpret_cast<void**>(output_array_cmpl),&status); loop_timer.tock(); time_diffs.push_back(loop_timer.get_time_diff()/batch); clock_diffs.push_back(loop_timer.get_clocks_diff()/batch); } // end of local test // summary: uint64_t min_value = *std::min_element(time_diffs.begin(),time_diffs.end()); local_result.time_consumed = std::accumulate(time_diffs.begin(),time_diffs.end(),0.0)/time_diffs.size(); local_result.time_consumed_min = min_value; local_result.time_consumed_max = *std::max_element(time_diffs.begin(),time_diffs.end()); local_result << std::string("note: The shortest time for one image obtained from the chrono: " + C_time_control::time_diff_string(min_value)); local_result << std::string("note: Values of time's and clock's were divided by current value of batch: "+std::to_string(batch)); local_result.clocks_consumed = std::accumulate(clock_diffs.begin(),clock_diffs.end(),0.0)/clock_diffs.size(); local_result.clocks_consumed_min = *std::min_element(clock_diffs.begin(),clock_diffs.end()); local_result.clocks_consumed_max = *std::max_element(clock_diffs.begin(),clock_diffs.end()); tests_results << local_result; if(images != nullptr) delete images; if(workload_output != nullptr) delete workload_output; if(workload != nullptr) di->workload_delete_function(workload); } // --------------------------------------------------------------------------------------------------------- run_ok = true; } catch(std::runtime_error &error) { run_result << "error: " + std::string(error.what()); run_ok = false; } catch(...) { run_result << "error: unknown"; run_ok = false; } run_timer.tock(); run_result.time_consumed = run_timer.get_time_diff(); run_result.clocks_consumed = run_timer.get_clocks_diff(); run_result.passed = run_ok; tests_results << run_result; if (!done()) run_ok=false; std::cout << "<- Test " << (run_ok ? "passed" : "failed") << std::endl;; return run_ok; }
bool test_view::run() { bool run_ok = true; test_measurement_result run_result; run_result.description = "RUN SUMMARY: " + test_description; C_time_control run_timer; std::cout << "-> Testing: " << test_description << std::endl; try { if( !init() ) throw std::runtime_error( "init() returns false so can't run test" ); run_timer.tick(); //start time measurement run_result << std::string( "run test with " + current_tested_device->get_device_description() ); NN_WORKLOAD_DATA_TYPE input_format = NN_WORKLOAD_DATA_TYPE_F32_3D_BATCH; NN_WORKLOAD_DATA_TYPE output_format = NN_WORKLOAD_DATA_TYPE_F32_3D_BATCH; std::mt19937 generator( 1 ); std::uniform_int_distribution<uint32_t> distribution( 0, 56/2 ); auto compare_data = [](nn::workload_data<nn::layout_f32>& item, nn::data<float>& ref_item) { float relative_error_threshold = 1e-3f, absolute_error_threshold = 1e-6f, absoulte_error_limit = 1e-4f; uint32_t size_n = item.get_length(0), size_x = item.get_length(1), size_y = item.get_length(2), size_z = item.get_length(3); for(uint32_t n = 0; n < size_n; ++n) for(uint32_t z = 0; z < size_z; ++z) for( uint32_t y = 0; y < size_y; ++y ) for( uint32_t x = 0; x < size_x; ++x ) { float workload_val = item.at(n, x, y, z, 0, 0); float ref_val = ref_item.at(z, x, y, n); if( fabs(workload_val) < absoulte_error_limit) { if(fabs( workload_val - ref_val ) > absolute_error_threshold) { return false; } } else if(fabs(workload_val - ref_val) / fabs(ref_val) > relative_error_threshold) return false; } return true; }; for( uint32_t batch : { 1, 8, 48 } ) { // simple sample pattern of test with time measuring: bool local_ok = true; test_measurement_result local_result; local_result.description = "RUN PART: (batch " + std::to_string( batch ) + ") execution of " + test_description; C_time_control local_timer; for(uint32_t size_x : { 5,16,56 }) { for(uint32_t size_y : { 5,16,56 }) { for(uint32_t size_z : { 1,8,16 }) { // --------------------------------------------------------------------------------------------------------- // begin local test auto input = new nn::data<float>(size_z,size_x,size_y,batch); if(input == nullptr) throw std::runtime_error("unable to create input nn::data for batch = " +std::to_string(batch)); nn_data_populate(input,-100.0f,100.0f); auto wrkld_data = new nn::workload_data<nn::layout_f32>(input->buffer, {batch,size_x,size_y,size_z,1,1}, nn::data_helper_layout_lookup_zxynpq<float>() ); if(wrkld_data == nullptr) { delete input; throw std::runtime_error("unable to create wrkld_data for batch = " +std::to_string(batch)); } nn_workload_data_coords_t* view_begin_coords,*view_end_coords; { // create random view view_begin_coords = new nn_workload_data_coords_t{ distribution(generator) % batch, distribution(generator) % size_x, distribution(generator) % size_y, distribution(generator) % size_z, 0, 0 }; if(view_begin_coords == nullptr) { delete input; delete wrkld_data; throw std::runtime_error("unable to create view_begin_coords for batch = " +std::to_string(batch)); } view_end_coords = new nn_workload_data_coords_t{ distribution(generator) % batch, distribution(generator) % size_x, distribution(generator) % size_y, distribution(generator) % size_z, 0, 0 }; if(view_end_coords == nullptr) { delete input; delete wrkld_data; delete view_begin_coords; throw std::runtime_error("unable to create view_end_coords for batch = " +std::to_string(batch)); } for(int i = 0 ; i <= 4 ; ++i) if(view_begin_coords->t[i] > view_end_coords->t[i]) { std::swap(view_begin_coords->t[i],view_end_coords->t[i]); } } // create view auto workload_output = new nn::workload_data<nn::layout_f32>(*wrkld_data,*view_begin_coords,*view_end_coords); if(workload_output == nullptr) { delete input; delete wrkld_data; delete view_begin_coords; delete view_end_coords; delete workload_output; throw std::runtime_error("unable to create workload_output nn::workload_data for batch = " +std::to_string(batch)); } // naive view auto naive_output = naive_view(*input,*view_begin_coords,*view_end_coords); local_ok = compare_data(*workload_output,*naive_output); if(input) delete input; if(workload_output) delete workload_output; if(naive_output) delete naive_output; if(view_begin_coords)delete view_begin_coords; if(view_end_coords) delete view_end_coords; if(wrkld_data) delete wrkld_data; // END of run tests // --------------------------------------------------------------------------------------------------------- } // The pattern, of complex instruction above, can be multiplied } } // end of local test // summary: local_timer.tock(); local_result.time_consumed = local_timer.get_time_diff(); local_result.clocks_consumed = local_timer.get_clocks_diff(); local_result.passed = local_ok; tests_results << local_result; run_ok = run_ok && local_ok; } } catch(std::runtime_error &error) { run_result << "error: " + std::string(error.what()); run_ok = false; } catch(std::exception &error) { run_result << "error: " + std::string(error.what()); run_ok = false; } catch(...) { run_result << "unknown error"; run_ok = false; } run_timer.tock(); run_result.time_consumed = run_timer.get_time_diff(); run_result.clocks_consumed = run_timer.get_clocks_diff(); run_result.passed = run_ok; tests_results << run_result; if(!done()) run_ok = false; std::cout << "<- Test " << (run_ok ? "passed" : "failed") << std::endl; return run_ok; }