Exemplo n.º 1
0
void imagenet_toolset::load_cls_class_info()
{
	wnid_to_ilsvrc2014id_map.clear();
	ilsvrc2014id_to_wnid_map.clear();

	boost::filesystem::path cls_class_info_filepath = get_working_data_folder() / cls_class_info_filename;
	boost::filesystem::ifstream file_input(cls_class_info_filepath, std::ios_base::in);

	std::string str;
	std::getline(file_input, str); // Skip header
	int line_number = 1;
	while (true)
	{
		++line_number;
		std::getline(file_input, str);
		if (str.empty())
			break;
		std::vector<std::string> strs;
		boost::split(strs, str, boost::is_any_of("\t"));

		if (strs.size() != 4)
			throw std::runtime_error((boost::format("Wrong number of fields in line %1%: %2%") % line_number % str).str());

		int wnid = atol(strs[0].c_str());

		wnid_to_ilsvrc2014id_map.insert(std::make_pair(wnid, strs[1]));
		ilsvrc2014id_to_wnid_map.insert(std::make_pair(strs[1], wnid));
	}

	if (wnid_to_ilsvrc2014id_map.empty())
		throw std::runtime_error((boost::format("No class info loaded from %1%") % cls_class_info_filepath.string()).str());
}
Exemplo n.º 2
0
void imagenet_toolset::prepare_validating_data()
{
	std::vector<unsigned int> classid_list;
	{
		boost::filesystem::path validating_class_labels_filepath = get_input_data_folder() / devkit_folder_name / devkit_data_folder_name / validation_ground_truth_file_name;
		std::cout << "Reading ground truth labels from " + validating_class_labels_filepath.string() << "..." << std::endl;

		boost::filesystem::ifstream file_input(validating_class_labels_filepath, std::ios_base::in);

		std::string str;
		while (true)
		{
			std::getline(file_input, str);
			if (str.empty())
				break;

			unsigned int wnid = atol(str.c_str());
			unsigned int classid = get_classid_by_wnid(wnid);
			classid_list.push_back(classid);
		}
	}
	std::cout << classid_list.size() << " labels read\n";

	nnforge::varying_data_stream_writer_smart_ptr validating_data_writer;
	{
		boost::filesystem::path validating_file_path = get_working_data_folder() / validating_data_filename;
		std::cout << "Writing validating data to " << validating_file_path.string() << "..." << std::endl;
		nnforge_shared_ptr<std::ofstream> validating_file(new boost::filesystem::ofstream(validating_file_path, std::ios_base::out | std::ios_base::binary | std::ios_base::trunc));
		validating_data_writer = nnforge::varying_data_stream_writer_smart_ptr(new nnforge::varying_data_stream_writer(
			validating_file,
			static_cast<unsigned int>(classid_list.size())));
	}

	boost::filesystem::path validating_images_folder_path = get_input_data_folder() / validating_images_folder_name;
	for(int i = 0; i < classid_list.size(); ++i)
	{
		unsigned int class_id = classid_list[i];
		unsigned int image_id = i + 1;
		boost::filesystem::path image_file_path = validating_images_folder_path / (boost::format("ILSVRC2012_val_%|1$08d|.JPEG") % image_id).str();

		write_supervised_data(image_file_path, *validating_data_writer, class_id);
	}
	std::cout << classid_list.size() << " entries written" << std::endl;
}
Exemplo n.º 3
0
void gtsrb_toolset::prepare_training_data()
{
	{
		boost::filesystem::path file_path = get_working_data_folder() / training_data_filename;
		std::cout << "Writing data to " << file_path.string() << std::endl;

		std::tr1::shared_ptr<std::ofstream> file_with_data(new boost::filesystem::ofstream(file_path, std::ios_base::out | std::ios_base::binary | std::ios_base::trunc));
		nnforge::layer_configuration_specific input_configuration;
		input_configuration.feature_map_count = is_color ? 3 : 1;
		input_configuration.dimension_sizes.push_back(image_width);
		input_configuration.dimension_sizes.push_back(image_height);
		nnforge::layer_configuration_specific output_configuration;
		output_configuration.feature_map_count = class_count;
		output_configuration.dimension_sizes.push_back(1);
		output_configuration.dimension_sizes.push_back(1);
		nnforge::supervised_data_stream_writer writer(
			file_with_data,
			input_configuration,
			output_configuration);

		for(unsigned int folder_id = 0; folder_id < class_count; ++folder_id)
		{
			boost::filesystem::path subfolder_name = boost::filesystem::path("Final_Training") / "Images" / (boost::format("%|1$05d|") % folder_id).str();
			std::string annotation_file_name = (boost::format("GT-%|1$05d|.csv") % folder_id).str();

			write_folder(
				writer,
				subfolder_name,
				annotation_file_name.c_str(),
				true);
		}
	}
	
	{
		boost::filesystem::path file_path = get_working_data_folder() / validating_data_filename;
		std::cout << "Writing data to " << file_path.string() << std::endl;

		std::tr1::shared_ptr<std::ofstream> file_with_data(new boost::filesystem::ofstream(file_path, std::ios_base::out | std::ios_base::binary | std::ios_base::trunc));
		nnforge::layer_configuration_specific input_configuration;
		input_configuration.feature_map_count = is_color ? 3 : 1;
		input_configuration.dimension_sizes.push_back(image_width);
		input_configuration.dimension_sizes.push_back(image_height);
		nnforge::layer_configuration_specific output_configuration;
		output_configuration.feature_map_count = class_count;
		output_configuration.dimension_sizes.push_back(1);
		output_configuration.dimension_sizes.push_back(1);
		nnforge::supervised_data_stream_writer writer(
			file_with_data,
			input_configuration,
			output_configuration);

		boost::filesystem::path subfolder_name = boost::filesystem::path("Final_Test") / "Images";
		std::string annotation_file_name = "GT-final_test.csv";

		write_folder(
			writer,
			subfolder_name,
			annotation_file_name.c_str(),
			false);
	}
}
Exemplo n.º 4
0
void imagenet_toolset::prepare_randomized_training_data()
{
	boost::filesystem::path training_images_folder_path = get_input_data_folder() / training_images_folder_name;
	unsigned int total_training_image_count = 0;
	std::cout << "Enumerating training images from " + training_images_folder_path.string() << "..." << std::endl;
	std::map<std::string, std::vector<unsigned int> > ilsvrc2014id_to_localid_list_map;
	{
		nnforge_regex folder_expression(ilsvrc2014id_pattern);
		nnforge_regex file_expression(training_image_filename_pattern);
		nnforge_cmatch what;
		for(boost::filesystem::directory_iterator it = boost::filesystem::directory_iterator(training_images_folder_path); it != boost::filesystem::directory_iterator(); ++it)
		{
			if (it->status().type() == boost::filesystem::directory_file)
			{
				boost::filesystem::path folder_path = it->path();
				std::string folder_name = folder_path.filename().string();
				if (nnforge_regex_match(folder_name, folder_expression))
				{
					const std::string& ilsvrc2014id = folder_name;
					unsigned int class_id = get_classid_by_wnid(get_wnid_by_ilsvrc2014id(ilsvrc2014id));
					std::vector<unsigned int>& localid_list =  ilsvrc2014id_to_localid_list_map.insert(std::make_pair(ilsvrc2014id, std::vector<unsigned int>())).first->second;
					for(boost::filesystem::directory_iterator it2 = boost::filesystem::directory_iterator(folder_path); it2 != boost::filesystem::directory_iterator(); ++it2)
					{
						if (it2->status().type() == boost::filesystem::regular_file)
						{
							boost::filesystem::path file_path = it2->path();
							std::string file_name = file_path.filename().string();
							if (nnforge_regex_search(file_name.c_str(), what, file_expression))
							{
								std::string ilsvrc2014id2 = std::string(what[1].first, what[1].second);
								int localid = atol(std::string(what[2].first, what[2].second).c_str());
								localid_list.push_back(localid);
								++total_training_image_count;
							}
						}
					}
				}
			}
		}
	}
	std::cout << total_training_image_count << " training images found\n";
	std::map<std::string, std::pair<unsigned int, float> > ilsvrc2014id_to_localid_count_and_remaining_ratio_map;
	for(std::map<std::string, std::vector<unsigned int> >::iterator it = ilsvrc2014id_to_localid_list_map.begin(); it != ilsvrc2014id_to_localid_list_map.end(); ++it)
		ilsvrc2014id_to_localid_count_and_remaining_ratio_map.insert(std::make_pair(it->first, std::make_pair(it->second.size(), it->second.size() > 0 ? 1.0F : 0.0F)));
	nnforge::random_generator rnd;

	nnforge::varying_data_stream_writer_smart_ptr training_data_writer;
	{
		boost::filesystem::path training_file_path = get_working_data_folder() / training_randomized_data_filename;
		std::cout << "Writing randomized training data to " << training_file_path.string() << "..." << std::endl;
		nnforge_shared_ptr<std::ofstream> training_file(new boost::filesystem::ofstream(training_file_path, std::ios_base::out | std::ios_base::binary | std::ios_base::trunc));
		training_data_writer = nnforge::varying_data_stream_writer_smart_ptr(new nnforge::varying_data_stream_writer(
			training_file,
			total_training_image_count));
	}

	std::vector<std::string> best_ilsvrc2014id_list;
	for(unsigned int entry_to_write_count = 0; entry_to_write_count < total_training_image_count; ++entry_to_write_count)
	{
		if (best_ilsvrc2014id_list.empty())
		{
			float best_ratio = -1.0F;
			for(std::map<std::string, std::pair<unsigned int, float> >::const_iterator it = ilsvrc2014id_to_localid_count_and_remaining_ratio_map.begin(); it != ilsvrc2014id_to_localid_count_and_remaining_ratio_map.end(); ++it)
			{
				float new_ratio = it->second.second;
				if (new_ratio > best_ratio)
				{
					best_ilsvrc2014id_list.clear();
					best_ilsvrc2014id_list.push_back(it->first);
					best_ratio = new_ratio;
				}
				else if (new_ratio == best_ratio)
					best_ilsvrc2014id_list.push_back(it->first);
			}
		}

		std::string best_ilsvrc2014id;
		{
			nnforge_uniform_int_distribution<unsigned int> dist(0, static_cast<unsigned int>(best_ilsvrc2014id_list.size()) - 1);
			unsigned int index = dist(rnd);
			best_ilsvrc2014id = best_ilsvrc2014id_list[index];
			best_ilsvrc2014id_list[index] = best_ilsvrc2014id_list.back();
			best_ilsvrc2014id_list.pop_back();
		}

		std::map<std::string, std::vector<unsigned int> >::iterator bucket_it = ilsvrc2014id_to_localid_list_map.find(best_ilsvrc2014id);
		std::vector<unsigned int>& localid_list = bucket_it->second;
		if (localid_list.empty())
			throw std::runtime_error("Unexpected error in prepare_training_data: No elements left");

		nnforge_uniform_int_distribution<unsigned int> dist(0, static_cast<unsigned int>(localid_list.size()) - 1);

		unsigned int index = dist(rnd);
		unsigned int local_id = localid_list[index];
		unsigned int leftover_local_id = localid_list[localid_list.size() - 1];
		localid_list[index] = leftover_local_id;
		localid_list.pop_back();
		std::map<std::string, std::pair<unsigned int, float> >::iterator it = ilsvrc2014id_to_localid_count_and_remaining_ratio_map.find(best_ilsvrc2014id);
		it->second.second = static_cast<float>(localid_list.size()) / static_cast<float>(it->second.first);

		std::string filename = (boost::format("%1%_%2%.JPEG") % best_ilsvrc2014id % local_id).str();
		boost::filesystem::path image_file_path = training_images_folder_path / best_ilsvrc2014id / filename;
		int class_id = get_classid_by_wnid(get_wnid_by_ilsvrc2014id(best_ilsvrc2014id));

		write_supervised_data(image_file_path, *training_data_writer, class_id);

		if (((entry_to_write_count + 1) % 100000) == 0)
			std::cout << (entry_to_write_count + 1) << " entries written" << std::endl;
	}
	std::cout << total_training_image_count << " entries written" << std::endl;
}
Exemplo n.º 5
0
std::vector<nnforge::network_data_pusher_smart_ptr> imagenet_toolset::get_validators_for_training(nnforge::network_schema_smart_ptr schema)
{
	std::vector<nnforge::network_data_pusher_smart_ptr> res = neural_network_toolset::get_validators_for_training(schema);

	nnforge_shared_ptr<std::istream> validating_data_stream(new boost::filesystem::ifstream(get_working_data_folder() / validating_data_filename, std::ios_base::in | std::ios_base::binary));
	nnforge::supervised_data_reader_smart_ptr current_reader = get_validating_reader(validating_data_stream, true);
	{
		nnforge::supervised_data_reader_smart_ptr new_reader(new nnforge::supervised_transformed_input_data_reader(current_reader, nnforge::data_transformer_smart_ptr(new nnforge::flip_2d_data_sampler_transformer(1))));
		current_reader = new_reader;
	}
	{
		nnforge::supervised_data_reader_smart_ptr new_reader(new nnforge::supervised_transformed_input_data_reader(current_reader, nnforge::data_transformer_smart_ptr(new nnforge::convert_data_type_transformer())));
		current_reader = new_reader;
	}
	{
		nnforge::supervised_data_reader_smart_ptr new_reader(new nnforge::supervised_transformed_input_data_reader(current_reader, get_input_data_normalize_transformer()));
		current_reader = new_reader;
	}

	res.push_back(nnforge::network_data_pusher_smart_ptr(new nnforge::validate_progress_network_data_pusher(
		tester_factory->create(schema),
		current_reader,
		get_validating_visualizer(),
		get_error_function(),
		current_reader->get_sample_count(),
		enrich_validation_report_frequency)));

	return res;
}
Exemplo n.º 6
0
nnforge::supervised_data_reader_smart_ptr imagenet_toolset::get_initial_data_reader_for_validating() const
{
	nnforge_shared_ptr<std::istream> validating_data_stream(new boost::filesystem::ifstream(get_working_data_folder() / validating_data_filename, std::ios_base::in | std::ios_base::binary));
	return get_validating_reader(validating_data_stream, rich_inference);
}
Exemplo n.º 7
0
nnforge::supervised_data_reader_smart_ptr imagenet_toolset::get_initial_data_reader_for_training(bool force_deterministic) const
{
	nnforge_shared_ptr<std::istream> training_data_stream(new boost::filesystem::ifstream(get_working_data_folder() / training_randomized_data_filename, std::ios_base::in | std::ios_base::binary));

	nnforge::supervised_data_reader_smart_ptr res(new nnforge::supervised_random_image_data_stream_reader(
		training_data_stream,
		training_image_original_width,
		training_image_original_height,
		training_image_width,
		training_image_height,
		class_count,
		true,
		force_deterministic));

	return res;
}
Exemplo n.º 8
0
nnforge::supervised_data_reader_smart_ptr imagenet_toolset::get_initial_data_reader_for_normalizing() const
{
	nnforge_shared_ptr<std::istream> training_data_stream(new boost::filesystem::ifstream(get_working_data_folder() / training_randomized_data_filename, std::ios_base::in | std::ios_base::binary));
	nnforge::supervised_data_reader_smart_ptr current_reader(new nnforge::supervised_random_image_data_stream_reader(
		training_data_stream,
		training_image_original_width,
		training_image_original_height,
		training_image_width,
		training_image_height,
		class_count,
		true,
		true));
	current_reader = nnforge::supervised_data_reader_smart_ptr(new nnforge::supervised_transformed_input_data_reader(current_reader, nnforge::data_transformer_smart_ptr(new nnforge::convert_data_type_transformer())));
	return current_reader;
}
Exemplo n.º 9
0
void imagenet_toolset::prepare_true_randomized_training_data()
{
	boost::filesystem::path training_images_folder_path = get_input_data_folder() / training_images_folder_name;
	std::cout << "Enumerating training images from " + training_images_folder_path.string() << "..." << std::endl;
	std::vector<std::pair<std::string, unsigned int> > ilsvrc2014id_localid_pair_list;
	{
		nnforge_regex folder_expression(ilsvrc2014id_pattern);
		nnforge_regex file_expression(training_image_filename_pattern);
		nnforge_cmatch what;
		for(boost::filesystem::directory_iterator it = boost::filesystem::directory_iterator(training_images_folder_path); it != boost::filesystem::directory_iterator(); ++it)
		{
			if (it->status().type() == boost::filesystem::directory_file)
			{
				boost::filesystem::path folder_path = it->path();
				std::string folder_name = folder_path.filename().string();
				if (nnforge_regex_match(folder_name, folder_expression))
				{
					const std::string& ilsvrc2014id = folder_name;
					unsigned int class_id = get_classid_by_wnid(get_wnid_by_ilsvrc2014id(ilsvrc2014id));
					for(boost::filesystem::directory_iterator it2 = boost::filesystem::directory_iterator(folder_path); it2 != boost::filesystem::directory_iterator(); ++it2)
					{
						if (it2->status().type() == boost::filesystem::regular_file)
						{
							boost::filesystem::path file_path = it2->path();
							std::string file_name = file_path.filename().string();
							if (nnforge_regex_search(file_name.c_str(), what, file_expression))
							{
								int localid = atol(std::string(what[2].first, what[2].second).c_str());
								ilsvrc2014id_localid_pair_list.push_back(std::make_pair(ilsvrc2014id, localid));
							}
						}
					}
				}
			}
		}
	}
	unsigned int total_training_image_count = static_cast<unsigned int>(ilsvrc2014id_localid_pair_list.size());
	std::cout << "Training images found: " << total_training_image_count << std::endl;

	nnforge::random_generator rnd;

	nnforge::varying_data_stream_writer_smart_ptr training_data_writer;
	{
		boost::filesystem::path training_file_path = get_working_data_folder() / training_randomized_data_filename;
		std::cout << "Writing randomized training data to " << training_file_path.string() << "..." << std::endl;
		nnforge_shared_ptr<std::ofstream> training_file(new boost::filesystem::ofstream(training_file_path, std::ios_base::out | std::ios_base::binary | std::ios_base::trunc));
		training_data_writer = nnforge::varying_data_stream_writer_smart_ptr(new nnforge::varying_data_stream_writer(
			training_file,
			static_cast<unsigned int>(ilsvrc2014id_localid_pair_list.size())));
	}

	for(unsigned int entry_written_count = 0; entry_written_count < total_training_image_count; ++entry_written_count)
	{
		nnforge_uniform_int_distribution<unsigned int> dist(0, static_cast<unsigned int>(ilsvrc2014id_localid_pair_list.size()) - 1);

		unsigned int index = dist(rnd);
		std::pair<std::string, unsigned int> ilsvrc2014id_localid_pair = ilsvrc2014id_localid_pair_list[index];
		ilsvrc2014id_localid_pair_list[index] = ilsvrc2014id_localid_pair_list[ilsvrc2014id_localid_pair_list.size() - 1];
		ilsvrc2014id_localid_pair_list.pop_back();

		std::string filename = (boost::format("%1%_%2%.JPEG") % ilsvrc2014id_localid_pair.first % ilsvrc2014id_localid_pair.second).str();
		boost::filesystem::path image_file_path = training_images_folder_path / ilsvrc2014id_localid_pair.first / filename;
		int class_id = get_classid_by_wnid(get_wnid_by_ilsvrc2014id(ilsvrc2014id_localid_pair.first));

		write_supervised_data(image_file_path, *training_data_writer, class_id);

		if (((entry_written_count + 1) % 100000) == 0)
			std::cout << (entry_written_count + 1) << " entries written" << std::endl;
	}
	std::cout << total_training_image_count << " entries written" << std::endl;
}