std::vector<std::vector<float> > network_trainer_sdlm::get_average_hessian_list(
		network_data_smart_ptr hessian,
		const std::vector<testing_result_smart_ptr>& history) const
	{
		std::vector<std::vector<float> >res;

		float min_hessian = std::numeric_limits<float>::max();
		float max_hessian = std::numeric_limits<float>::min();

		for(network_data::iterator it = hessian->begin(); it != hessian->end(); it++)
		{
			if (!(*it)->empty())
			{
				std::vector<float> hs_list;
				for(layer_data::iterator it2 = (*it)->begin(); it2 != (*it)->end(); it2++)
				{
					float sum = std::accumulate(it2->begin(), it2->end(), 0.0F);
					float new_hessian_per_block = sum / it2->size();
					hs_list.push_back(new_hessian_per_block);
					min_hessian = std::min<float>(min_hessian, new_hessian_per_block);
					max_hessian = std::max<float>(max_hessian, new_hessian_per_block);
				}
				res.push_back(hs_list);
			}
		}

		return res;
	}
	std::string network_trainer_sdlm::convert_hessian_to_training_vector(
		network_data_smart_ptr hessian,
		const std::vector<std::vector<float> >& average_hessian_list,
		const std::vector<testing_result_smart_ptr>& history) const
	{
		float min_hessian = std::numeric_limits<float>::max();
		float max_hessian = std::numeric_limits<float>::min();
		for(std::vector<std::vector<float> >::const_iterator it = average_hessian_list.begin(); it != average_hessian_list.end(); ++it)
		{
			const std::vector<float>& avl = *it;
			std::vector<float>::const_iterator it_max = std::max_element(avl.begin(), avl.end());
			if (it_max != avl.end())
				max_hessian = std::max(max_hessian, *it_max);
			std::vector<float>::const_iterator it_min = std::min_element(avl.begin(), avl.end());
			if (it_min != avl.end())
				min_hessian = std::min(min_hessian, *it_min);
		}
		float max_mu_current = std::min(max_mu, max_hessian * 0.5F);
		float mu = min_hessian * 0.5F * powf(mu_increase_factor, static_cast<float>(history.size()));
		mu = std::min(mu, max_mu_current);

		float eta = mu * speed * get_tail_decay_factor(static_cast<unsigned int>(history.size()));

		std::vector<std::vector<float> > avg_lr_lists;
		for(network_data::iterator it = hessian->begin(); it != hessian->end(); it++)
		{
			if ((*it)->size() > 0)
			{
				std::vector<float> avg_lr_list;
				for(layer_data::iterator it2 = (*it)->begin(); it2 != (*it)->end(); it2++)
				{
					hessian_transform ht(mu, eta);
					std::transform(it2->begin(), it2->end(), it2->begin(), ht);

					float sum = std::accumulate(it2->begin(), it2->end(), 0.0F);
					float new_vg_lr = sum / it2->size();
					avg_lr_list.push_back(new_vg_lr);
				}
				avg_lr_lists.push_back(avg_lr_list);
			}
		}

		std::string average_lr_str;
		for(std::vector<std::vector<float> >::const_iterator it = avg_lr_lists.begin(); it != avg_lr_lists.end(); it++)
		{
			if (it != avg_lr_lists.begin())
				average_lr_str += ", ";

			for(std::vector<float>::const_iterator it2 = it->begin(); it2 != it->end(); it2++)
			{
				if (it2 != it->begin())
					average_lr_str += " ";
				average_lr_str += (boost::format("%|1$.1e|") % *it2).str();
			}
		}

		return (boost::format("Eta = %|1$.2e|, Mu = %|2$.2e|, LR (%|3$s|)") % eta % mu % average_lr_str).str();
	}
示例#3
0
	std::pair<testing_result_smart_ptr, training_stat_smart_ptr> network_updater::update(
		supervised_data_reader& reader,
		const std::vector<std::vector<float> >& learning_rates,
		network_data_smart_ptr data,
		unsigned int batch_size,
		float weight_decay,
		float momentum,
		const std::map<unsigned int, float>& layer_to_dropout_rate_map)
	{
		// Check data-schema consistency
		data->check_network_data_consistency(*schema);

		set_input_configuration_specific(reader.get_input_configuration());

		// Check schema-reader consistency
		layer_config_list[layer_config_list.size() - 1].check_equality(reader.get_output_configuration());

		nnforge_uniform_real_distribution<float> dist(0.0F, 1.0F);
		for(std::vector<float>::iterator it = random_uniform_list.begin(); it != random_uniform_list.end(); ++it)
			*it = dist(gen);

		std::pair<testing_result_smart_ptr, training_stat_smart_ptr> res = actual_update(reader, learning_rates, data, batch_size, weight_decay, momentum, layer_to_dropout_rate_map);

		return res;
	}
	void network_analyzer::set_data(network_data_smart_ptr data)
	{
		// Check data-schema consistency
		data->check_network_data_consistency(*schema);

		actual_set_data(data);
	}
	std::string network_trainer_sdlm::convert_hessian_to_training_vector_per_layer_mu(
		network_data_smart_ptr hessian,
		const std::vector<std::vector<float> >& average_hessian_list,
		const std::vector<testing_result_smart_ptr>& history) const
	{
		std::vector<std::vector<float> >::const_iterator ah_it = average_hessian_list.begin();
		std::vector<std::vector<float> > avg_lr_lists;
		for(network_data::iterator it = hessian->begin(); it != hessian->end(); it++)
		{
			if ((*it)->size() > 0)
			{
				std::vector<float>::const_iterator ah_it2 = ah_it->begin();
				std::vector<float> avg_lr_list;
				for(layer_data::iterator it2 = (*it)->begin(); it2 != (*it)->end(); it2++, ah_it2++)
				{
					float mu = *ah_it2;
					float eta = mu * speed * get_tail_decay_factor(static_cast<unsigned int>(history.size()));
					hessian_transform ht(mu, eta);
					std::transform(it2->begin(), it2->end(), it2->begin(), ht);

					float sum = std::accumulate(it2->begin(), it2->end(), 0.0F);
					float new_vg_lr = sum / it2->size();
					avg_lr_list.push_back(new_vg_lr);
				}
				avg_lr_lists.push_back(avg_lr_list);
				++ah_it;
			}
		}

		std::string average_lr_str;
		for(std::vector<std::vector<float> >::const_iterator it = avg_lr_lists.begin(); it != avg_lr_lists.end(); it++)
		{
			if (it != avg_lr_lists.begin())
				average_lr_str += ", ";

			for(std::vector<float>::const_iterator it2 = it->begin(); it2 != it->end(); it2++)
			{
				if (it2 != it->begin())
					average_lr_str += " ";
				average_lr_str += (boost::format("%|1$.1e|") % *it2).str();
			}
		}

		return (boost::format("LR (%|1$s|)") % average_lr_str).str();
	}
	void network_trainer_sdlm::dump_lists(
		network_data_smart_ptr hessian,
		const char * filename_prefix) const
	{
		for(network_data::const_iterator it = hessian->begin(); it != hessian->end(); it++)
		{
			for(layer_data::const_iterator it2 = (*it)->begin(); it2 != (*it)->end(); it2++)
			{
				if (!it2->empty())
				{
					std::string filename = (boost::format("%1%_%|2$02d|_%|3$02d|.txt") % filename_prefix % (it - hessian->begin()) % (it2 - (*it)->begin())).str();
					std::ofstream out(filename.c_str());
					for(std::vector<float>::const_iterator it3 = it2->begin(); it3 != it2->end(); ++it3)
						out << *it3 << std::endl;
				}
			}
		}
	}
	network_data_smart_ptr hessian_calculator::get_hessian(
		unsupervised_data_reader& reader,
		network_data_smart_ptr data,
		unsigned int hessian_entry_to_process_count)
	{
		set_input_configuration_specific(reader.get_input_configuration());

		// Check data-schema consistency
		data->check_network_data_consistency(*schema);

		return actual_get_hessian(
			reader,
			data,
			hessian_entry_to_process_count);
	}
	void save_resume_network_data_pusher::save_data_to_file(
		network_data_smart_ptr data,
		std::string filename) const
	{
		std::string temp_filename = filename + ".temp";

		boost::filesystem::path filepath = folder_path / filename;
		boost::filesystem::path temp_filepath = folder_path / temp_filename;

		{
			boost::filesystem::ofstream file_with_data(temp_filepath, std::ios_base::out | std::ios_base::binary | std::ios_base::trunc);
			data->write(file_with_data);
		}

		boost::filesystem::rename(temp_filepath, filepath);
	}