void network_data_initializer::initialize( layer_data_list& data_list, const network_schema& schema) { std::vector<layer::const_ptr> layer_list = schema.get_layers(); for(int i = 0; i < layer_list.size(); ++i) { float weight_multiplier = 1.0F; if (layer_list[i]->get_type_name() == rectified_linear_layer::layer_type_name) { weight_multiplier *= sqrtf(2.0F); } if (layer_list[i]->get_type_name() == parametric_rectified_linear_layer::layer_type_name) { layer_data::ptr data = data_list.find(layer_list[i]->instance_name); float a = std::accumulate(data->at(0).begin(), data->at(0).end(), 0.0F) / static_cast<float>(data->at(0).size()); weight_multiplier *= sqrtf(2.0F / (1.0F + a * a)); } if (layer_list[i]->get_type_name() == add_layer::layer_type_name) { nnforge_shared_ptr<const add_layer> layer_derived = nnforge_dynamic_pointer_cast<const add_layer>(layer_list[i]); weight_multiplier *= 1.0F / std::max(static_cast<int>(layer_list[i]->input_layer_instance_names.size()), 1) / layer_derived->alpha; } if ((weight_multiplier != 1.0F) && (!layer_list[i]->input_layer_instance_names.empty())) { for(std::vector<std::string>::const_iterator it = layer_list[i]->input_layer_instance_names.begin(); it != layer_list[i]->input_layer_instance_names.end(); ++it) { layer::const_ptr previous_layer = schema.get_layer(*it); if ((previous_layer->get_type_name() == convolution_layer::layer_type_name) || (previous_layer->get_type_name() == sparse_convolution_layer::layer_type_name)) { layer_data::ptr data = data_list.find(previous_layer->instance_name); std::vector<float>::iterator it_start = data->at(0).begin(); std::vector<float>::iterator it_end = data->at(0).end(); for(std::vector<float>::iterator it = it_start; it != it_end; ++it) *it *= weight_multiplier; } } } } }
void validate_progress_network_data_pusher::push( const training_task_state& task_state, const network_schema& schema) { if ((task_state.get_current_epoch() % report_frequency) == 0) { forward_prop->set_data(*task_state.data); neuron_value_set_data_bunch_writer writer; forward_propagation::stat st = forward_prop->run(*reader, writer); forward_prop->clear_data(); unsigned int last_index = static_cast<unsigned int>(task_state.history.size()) - 1; std::cout << "----- Validating -----" << std::endl; std::cout << st << std::endl; for(std::map<std::string, std::pair<layer_configuration_specific, neuron_value_set::ptr> >::const_iterator it = writer.layer_name_to_config_and_value_set_map.begin(); it != writer.layer_name_to_config_and_value_set_map.end(); ++it) std::cout << schema.get_layer(it->first)->get_string_for_average_data(it->second.first, *it->second.second->get_average()) << std::endl; } }
void report_progress_network_data_pusher::push( const training_task_state& task_state, const network_schema& schema) { unsigned int last_index = static_cast<unsigned int>(task_state.history.size()) - 1; std::cout << "----- Training -----" << std::endl; std::cout << task_state.history[last_index].first << std::endl; if (!task_state.comments[last_index].empty()) std::cout << task_state.comments[last_index] << std::endl; std::cout << "Avg [rate weights updates]"; std::vector<std::string> data_name_list = task_state.data->data_list.get_data_layer_name_list(); for(std::vector<std::string>::const_iterator it = data_name_list.begin(); it != data_name_list.end(); ++it) { layer_data::ptr layer_data = task_state.data->data_list.get(*it); if (!layer_data->empty()) { std::cout << ", " << *it; const std::vector<float>& absolute_updates = task_state.history[last_index].first.average_absolute_updates.find(*it)->second; for(int part_id = 0; part_id < layer_data->size(); ++part_id) { const std::vector<float>& weights = layer_data->at(part_id); double sum = 0.0; for(std::vector<float>::const_iterator it = weights.begin(); it != weights.end(); ++it) sum += static_cast<double>(fabsf(*it)); float avg_weight = static_cast<float>(sum) / static_cast<float>(weights.size()); std::cout << (boost::format(" [%|1$.2e| %|2$.2e| %|3$.2e|]") % (absolute_updates[part_id] / avg_weight) % avg_weight % absolute_updates[part_id]); } } } std::cout << std::endl; for(std::map<std::string, std::pair<layer_configuration_specific, nnforge_shared_ptr<std::vector<float> > > >::const_iterator it = task_state.history[last_index].second.begin(); it != task_state.history[last_index].second.end(); ++it) std::cout << schema.get_layer(it->first)->get_string_for_average_data(it->second.first, *it->second.second) << std::endl; }
backward_propagation::backward_propagation( const network_schema& schema, const std::vector<std::string>& output_layer_names, const std::vector<std::string>& error_source_layer_names, const std::vector<std::string>& exclude_data_update_layer_names, debug_state::ptr debug, profile_state::ptr profile) : output_layer_names(output_layer_names) , error_source_layer_names(error_source_layer_names) , exclude_data_update_layer_names(exclude_data_update_layer_names) , debug(debug) , profile(profile) { if (error_source_layer_names.empty()) throw neural_network_exception("No error source layers specified for backward_propagation"); this->schema = network_schema::const_ptr(new network_schema(schema.get_required_layers( output_layer_names, error_source_layer_names, exclude_data_update_layer_names))); if (debug->is_debug()) { boost::filesystem::ofstream out(debug->get_path_to_unique_file("backward_prop_schema_reduced", "gv"), std::ios_base::out | std::ios_base::trunc); this->schema->write_gv(out); } cumulative_tiling_factor_map = this->schema->get_cumulative_tiling_factor_map(); action_schema = this->schema->get_actions_for_backward_propagation( output_layer_names, error_source_layer_names, exclude_data_update_layer_names, same_output_action_sets); for(std::vector<std::vector<layer_name_with_action> >::const_iterator it = same_output_action_sets.begin(); it != same_output_action_sets.end(); ++it) { const std::vector<layer_name_with_action>& same_output_actions = *it; for(std::vector<layer_name_with_action>::const_iterator it2 = same_output_actions.begin(); it2 != same_output_actions.end() - 1; ++it2) add_output_actions.insert(*it2); if (debug->is_debug()) { std::stringstream s; s << "Same output for actions: "; for(std::vector<layer_name_with_action>::const_iterator it2 = same_output_actions.begin(); it2 != same_output_actions.end(); ++it2) { if (it2 != same_output_actions.begin()) s << ", "; s << it2->get_name() << " " << it2->get_action().str(); } debug->output_message(s.str().c_str()); } } if (debug->is_debug()) { std::vector<layer_name_with_action> actions = action_schema->get_actions(); std::map<layer_name_with_action, unsigned int> layer_name_with_action_color_map; for(std::vector<layer_name_with_action>::const_iterator it = actions.begin(); it != actions.end(); ++it) { unsigned int color_id; switch (it->get_action().get_action_type()) { case layer_action::forward: color_id = 0; break; case layer_action::backward_data: color_id = 1; break; case layer_action::backward_weights: color_id = 2; break; case layer_action::backward_data_and_weights: color_id = 3; break; case layer_action::update_weights: color_id = 4; break; default: color_id = 5; break; } layer_name_with_action_color_map.insert(std::make_pair(*it, color_id)); } boost::filesystem::ofstream out(debug->get_path_to_unique_file("backward_prop_action_schema", "gv"), std::ios_base::out | std::ios_base::trunc); this->action_schema->write_gv(out, layer_name_with_action_color_map); } output_layers_tiling_factor = 1; for(std::vector<std::string>::const_iterator it = output_layer_names.begin(); it != output_layer_names.end(); ++it) { if (it == output_layer_names.begin()) output_layers_tiling_factor = cumulative_tiling_factor_map[*it]; else if (output_layers_tiling_factor != cumulative_tiling_factor_map[*it]) throw neural_network_exception((boost::format("Inconsistent tiling factors across output layers: %1% and %2%") % output_layers_tiling_factor % cumulative_tiling_factor_map[*it]).str()); } std::vector<layer::const_ptr> data_layers = this->schema->get_data_layers(); for(std::vector<layer::const_ptr>::const_iterator it = data_layers.begin(); it != data_layers.end(); ++it) data_layer_names.insert((*it)->instance_name); }