float linear_sampler_layer::get_flops_per_entry( const std::vector<layer_configuration_specific>& input_configuration_specific_list, const layer_action& action) const { switch (action.get_action_type()) { case layer_action::forward: { layer_configuration_specific output_config = get_output_layer_configuration_specific(input_configuration_specific_list); unsigned int neuron_count_per_feature_map = output_config.get_neuron_count_per_feature_map(); unsigned int per_feature_map_work = 7; unsigned int constant_work = 12; return static_cast<float>(neuron_count_per_feature_map) * static_cast<float>(constant_work + per_feature_map_work * output_config.feature_map_count); } case layer_action::backward_data: { layer_configuration_specific output_config = get_output_layer_configuration_specific(input_configuration_specific_list); unsigned int neuron_count_per_feature_map = output_config.get_neuron_count_per_feature_map(); unsigned int per_feature_map_work = 8; unsigned int constant_work = 18; return static_cast<float>(neuron_count_per_feature_map) * static_cast<float>(constant_work + per_feature_map_work * output_config.feature_map_count); } case layer_action::backward_weights: default: return 0.0F; } }
size_t convolution_layer_updater_cuda::get_temporary_working_per_entry_buffer_size(const layer_action& action) const { if (action.get_action_type() == layer_action::backward_data) { return input_elem_count_per_entry_list[0] * sizeof(float); } else return 0; }
float negative_log_likelihood_layer::get_flops_per_entry( const std::vector<layer_configuration_specific>& input_configuration_specific_list, const layer_action& action) const { switch (action.get_action_type()) { case layer_action::forward: { unsigned int neuron_count = get_output_layer_configuration_specific(input_configuration_specific_list).get_neuron_count(); unsigned int per_item_flops = input_configuration_specific_list[0].feature_map_count * 3; return static_cast<float>(neuron_count) * static_cast<float>(per_item_flops); } case layer_action::backward_data: { unsigned int neuron_count = input_configuration_specific_list[action.get_backprop_index()].get_neuron_count(); unsigned int per_item_flops = 2; return static_cast<float>(neuron_count) * static_cast<float>(per_item_flops); } default: return 0.0F; } }
size_t softmax_layer_updater_plain::get_temporary_working_fixed_buffer_size( const layer_action& action, const std::set<layer_action>& actions, plain_running_configuration::const_ptr plain_config, layer::const_ptr layer_schema, const std::vector<layer_configuration_specific>& input_configuration_specific_list, const layer_configuration_specific& output_configuration_specific) const { if (action.get_action_type() == layer_action::forward) { return plain_config->openmp_thread_count * output_configuration_specific.feature_map_count * sizeof(float); } else return 0; }
float parametric_rectified_linear_layer::get_flops_per_entry( const std::vector<layer_configuration_specific>& input_configuration_specific_list, const layer_action& action) const { switch (action.get_action_type()) { case layer_action::forward: return static_cast<float>(input_configuration_specific_list[0].get_neuron_count() * 2); case layer_action::backward_data: return static_cast<float>(input_configuration_specific_list[0].get_neuron_count() * 2); case layer_action::backward_weights: return static_cast<float>(input_configuration_specific_list[0].get_neuron_count() * 3); default: return 0.0F; } }
float accuracy_layer::get_flops_per_entry( const std::vector<layer_configuration_specific>& input_configuration_specific_list, const layer_action& action) const { switch (action.get_action_type()) { case layer_action::forward: { unsigned int neuron_count = get_output_layer_configuration_specific(input_configuration_specific_list).get_neuron_count(); unsigned int per_item_flops = input_configuration_specific_list[0].feature_map_count * 2; return static_cast<float>(neuron_count) * static_cast<float>(per_item_flops); } case layer_action::backward_data: throw neural_network_exception("get_backward_flops is not implemented for accuracy_layer"); default: return 0.0F; } }
float upsampling_layer::get_flops_per_entry( const std::vector<layer_configuration_specific>& input_configuration_specific_list, const layer_action& action) const { switch (action.get_action_type()) { case layer_action::forward: return 0.0F; case layer_action::backward_data: { unsigned int neuron_count = input_configuration_specific_list[0].get_neuron_count(); unsigned int per_item_flops = feature_map_upsampling_size; std::for_each(upsampling_sizes.begin(), upsampling_sizes.end(), per_item_flops *= boost::lambda::_1); return static_cast<float>(neuron_count) * static_cast<float>(per_item_flops); } default: return 0.0F; } }
float rgb_to_yuv_convert_layer::get_flops_per_entry( const std::vector<layer_configuration_specific>& input_configuration_specific_list, const layer_action& action) const { switch (action.get_action_type()) { case layer_action::forward: { unsigned int neuron_count = input_configuration_specific_list[0].get_neuron_count_per_feature_map() * static_cast<unsigned int>(color_feature_map_config_list.size()); return static_cast<float>(neuron_count * 9); } case layer_action::backward_data: { unsigned int neuron_count = input_configuration_specific_list[0].get_neuron_count_per_feature_map() * static_cast<unsigned int>(color_feature_map_config_list.size()); return static_cast<float>(neuron_count * 9); } default: return 0.0F; } }
float max_subsampling_layer::get_flops_per_entry( const std::vector<layer_configuration_specific>& input_configuration_specific_list, const layer_action& action) const { switch (action.get_action_type()) { case layer_action::forward: { unsigned int neuron_count = get_output_layer_configuration_specific(input_configuration_specific_list).get_neuron_count(); unsigned int per_item_flops = feature_map_subsampling_size * entry_subsampling_size; std::for_each(subsampling_sizes.begin(), subsampling_sizes.end(), [&per_item_flops] (unsigned int x) { per_item_flops *= x; }); per_item_flops -= 1; return static_cast<float>(neuron_count) * static_cast<float>(per_item_flops); } case layer_action::backward_data: return 0.0F; default: return 0.0F; } }
std::pair<size_t, bool> convolution_layer_updater_cuda::get_temporary_working_fixed_buffer_size(const layer_action& action) const { bool is_over_sol_algos_available = cudnn_util::is_over_sol_algos_available(window_sizes, strides, dilation); switch (action.get_action_type()) { case layer_action::forward: case layer_action::backward_data: { unsigned int working_buffer_elem_count = std::max(input_configuration_specific_list[0].feature_map_count, output_configuration_specific.feature_map_count); for(int i = 0; i < window_sizes.size(); ++i) working_buffer_elem_count *= window_sizes[i]; return std::make_pair(std::max(working_buffer_elem_count * sizeof(int), (size_t)(1024*1024)), is_over_sol_algos_available); } case layer_action::backward_weights: { return std::make_pair(update_weights_find_algo_working_buffer_size + update_weights_working_buffer_size, is_over_sol_algos_available); } default: return std::make_pair(0, false); } }
int layer_updater_cuda::get_input_index_layer_can_write(const layer_action& action) const { if (actions.find(action) == actions.end()) throw neural_network_exception((boost::format("get_input_index_layer_can_write called for layer %1% for action %2% while it is not configured to run such an action") % layer_schema->instance_name % action.str()).str()); return -1; }
size_t layer_updater_cuda::get_temporary_working_per_entry_buffer_size(const layer_action& action) const { if (actions.find(action) == actions.end()) throw neural_network_exception((boost::format("get_temporary_working_per_entry_buffer_size called for layer %1% for action %2% while it is not configured to run such an action") % layer_schema->instance_name % action.str()).str()); return 0; }