void network_data_initializer::initialize(
		layer_data_list& data_list,
		const network_schema& schema)
	{
		std::vector<layer::const_ptr> layer_list = schema.get_layers();
		for(int i = 0; i < layer_list.size(); ++i)
		{
			float weight_multiplier = 1.0F;
			if (layer_list[i]->get_type_name() == rectified_linear_layer::layer_type_name)
			{
				weight_multiplier *= sqrtf(2.0F);
			}
			if (layer_list[i]->get_type_name() == parametric_rectified_linear_layer::layer_type_name)
			{
				layer_data::ptr data = data_list.find(layer_list[i]->instance_name);
				float a = std::accumulate(data->at(0).begin(), data->at(0).end(), 0.0F) / static_cast<float>(data->at(0).size());
				weight_multiplier *= sqrtf(2.0F / (1.0F + a * a));
			}
			if (layer_list[i]->get_type_name() == add_layer::layer_type_name)
			{
				nnforge_shared_ptr<const add_layer> layer_derived = nnforge_dynamic_pointer_cast<const add_layer>(layer_list[i]);
				weight_multiplier *= 1.0F / std::max(static_cast<int>(layer_list[i]->input_layer_instance_names.size()), 1) / layer_derived->alpha;
			}

			if ((weight_multiplier != 1.0F) && (!layer_list[i]->input_layer_instance_names.empty()))
			{
				for(std::vector<std::string>::const_iterator it = layer_list[i]->input_layer_instance_names.begin(); it != layer_list[i]->input_layer_instance_names.end(); ++it)
				{
					layer::const_ptr previous_layer = schema.get_layer(*it);
					if ((previous_layer->get_type_name() == convolution_layer::layer_type_name) || (previous_layer->get_type_name() == sparse_convolution_layer::layer_type_name))
					{
						layer_data::ptr data = data_list.find(previous_layer->instance_name);
						std::vector<float>::iterator it_start = data->at(0).begin();
						std::vector<float>::iterator it_end = data->at(0).end();
						for(std::vector<float>::iterator it = it_start; it != it_end; ++it)
							*it *= weight_multiplier;
					}
				}
			}
		}
	}
	void validate_progress_network_data_pusher::push(
		const training_task_state& task_state,
		const network_schema& schema)
	{
		if ((task_state.get_current_epoch() % report_frequency) == 0)
		{
			forward_prop->set_data(*task_state.data);

			neuron_value_set_data_bunch_writer writer;
			forward_propagation::stat st = forward_prop->run(*reader, writer);

			forward_prop->clear_data();

			unsigned int last_index = static_cast<unsigned int>(task_state.history.size()) - 1;

			std::cout << "----- Validating -----" << std::endl;
			std::cout << st << std::endl;

			for(std::map<std::string, std::pair<layer_configuration_specific, neuron_value_set::ptr> >::const_iterator it = writer.layer_name_to_config_and_value_set_map.begin(); it != writer.layer_name_to_config_and_value_set_map.end(); ++it)
				std::cout << schema.get_layer(it->first)->get_string_for_average_data(it->second.first, *it->second.second->get_average()) << std::endl;
		}
	}
	void report_progress_network_data_pusher::push(
		const training_task_state& task_state,
		const network_schema& schema)
	{
		unsigned int last_index = static_cast<unsigned int>(task_state.history.size()) - 1;

		std::cout << "----- Training -----" << std::endl;
		std::cout << task_state.history[last_index].first << std::endl;
		if (!task_state.comments[last_index].empty())
			std::cout << task_state.comments[last_index] << std::endl;

		std::cout << "Avg [rate weights updates]";
		std::vector<std::string> data_name_list = task_state.data->data_list.get_data_layer_name_list();
		for(std::vector<std::string>::const_iterator it = data_name_list.begin(); it != data_name_list.end(); ++it)
		{
			layer_data::ptr layer_data = task_state.data->data_list.get(*it);
			if (!layer_data->empty())
			{
				std::cout << ", " << *it;
				const std::vector<float>& absolute_updates = task_state.history[last_index].first.average_absolute_updates.find(*it)->second;
				for(int part_id = 0; part_id < layer_data->size(); ++part_id)
				{
					const std::vector<float>& weights = layer_data->at(part_id);
					double sum = 0.0;
					for(std::vector<float>::const_iterator it = weights.begin(); it != weights.end(); ++it)
						sum += static_cast<double>(fabsf(*it));
					float avg_weight = static_cast<float>(sum) / static_cast<float>(weights.size());

					std::cout << (boost::format(" [%|1$.2e| %|2$.2e| %|3$.2e|]") % (absolute_updates[part_id] / avg_weight) % avg_weight % absolute_updates[part_id]); 
				}
			}
		}
		std::cout << std::endl;

		for(std::map<std::string, std::pair<layer_configuration_specific, nnforge_shared_ptr<std::vector<float> > > >::const_iterator it = task_state.history[last_index].second.begin(); it != task_state.history[last_index].second.end(); ++it)
			std::cout << schema.get_layer(it->first)->get_string_for_average_data(it->second.first, *it->second.second) << std::endl;
	}
Example #4
0
	backward_propagation::backward_propagation(
		const network_schema& schema,
		const std::vector<std::string>& output_layer_names,
		const std::vector<std::string>& error_source_layer_names,
		const std::vector<std::string>& exclude_data_update_layer_names,
		debug_state::ptr debug,
		profile_state::ptr profile)
		: output_layer_names(output_layer_names)
		, error_source_layer_names(error_source_layer_names)
		, exclude_data_update_layer_names(exclude_data_update_layer_names)
		, debug(debug)
		, profile(profile)
	{
		if (error_source_layer_names.empty())
			throw neural_network_exception("No error source layers specified for backward_propagation");

		this->schema = network_schema::const_ptr(new network_schema(schema.get_required_layers(
			output_layer_names,
			error_source_layer_names,
			exclude_data_update_layer_names)));
		if (debug->is_debug())
		{
			boost::filesystem::ofstream out(debug->get_path_to_unique_file("backward_prop_schema_reduced", "gv"), std::ios_base::out | std::ios_base::trunc);
			this->schema->write_gv(out);
		}

		cumulative_tiling_factor_map = this->schema->get_cumulative_tiling_factor_map();

		action_schema = this->schema->get_actions_for_backward_propagation(
			output_layer_names,
			error_source_layer_names,
			exclude_data_update_layer_names,
			same_output_action_sets);
		for(std::vector<std::vector<layer_name_with_action> >::const_iterator it = same_output_action_sets.begin(); it != same_output_action_sets.end(); ++it)
		{
			const std::vector<layer_name_with_action>& same_output_actions = *it;
			for(std::vector<layer_name_with_action>::const_iterator it2 = same_output_actions.begin(); it2 != same_output_actions.end() - 1; ++it2)
				add_output_actions.insert(*it2);
			if (debug->is_debug())
			{
				std::stringstream s;
				s << "Same output for actions: ";
				for(std::vector<layer_name_with_action>::const_iterator it2 = same_output_actions.begin(); it2 != same_output_actions.end(); ++it2)
				{
					if (it2 != same_output_actions.begin())
						s << ", ";
					s << it2->get_name() << " " << it2->get_action().str();
				}
				debug->output_message(s.str().c_str());
			}
		}
		if (debug->is_debug())
		{
			std::vector<layer_name_with_action> actions = action_schema->get_actions();
			std::map<layer_name_with_action, unsigned int> layer_name_with_action_color_map;
			for(std::vector<layer_name_with_action>::const_iterator it = actions.begin(); it != actions.end(); ++it)
			{
				unsigned int color_id;
				switch (it->get_action().get_action_type())
				{
				case layer_action::forward:
					color_id = 0;
					break;
				case layer_action::backward_data:
					color_id = 1;
					break;
				case layer_action::backward_weights:
					color_id = 2;
					break;
				case layer_action::backward_data_and_weights:
					color_id = 3;
					break;
				case layer_action::update_weights:
					color_id = 4;
					break;
				default:
					color_id = 5;
					break;
				}
				layer_name_with_action_color_map.insert(std::make_pair(*it, color_id));
			}

			boost::filesystem::ofstream out(debug->get_path_to_unique_file("backward_prop_action_schema", "gv"), std::ios_base::out | std::ios_base::trunc);
			this->action_schema->write_gv(out, layer_name_with_action_color_map);
		}

		output_layers_tiling_factor = 1;
		for(std::vector<std::string>::const_iterator it = output_layer_names.begin(); it != output_layer_names.end(); ++it)
		{
			if (it == output_layer_names.begin())
				output_layers_tiling_factor = cumulative_tiling_factor_map[*it];
			else if (output_layers_tiling_factor != cumulative_tiling_factor_map[*it])
				throw neural_network_exception((boost::format("Inconsistent tiling factors across output layers: %1% and %2%") % output_layers_tiling_factor % cumulative_tiling_factor_map[*it]).str());
		}

		std::vector<layer::const_ptr> data_layers = this->schema->get_data_layers();
		for(std::vector<layer::const_ptr>::const_iterator it = data_layers.begin(); it != data_layers.end(); ++it)
			data_layer_names.insert((*it)->instance_name);
	}