Пример #1
0
void imagenet_toolset::prepare_validating_data()
{
	std::vector<unsigned int> classid_list;
	{
		boost::filesystem::path validating_class_labels_filepath = get_input_data_folder() / devkit_folder_name / devkit_data_folder_name / validation_ground_truth_file_name;
		std::cout << "Reading ground truth labels from " + validating_class_labels_filepath.string() << "..." << std::endl;

		boost::filesystem::ifstream file_input(validating_class_labels_filepath, std::ios_base::in);

		std::string str;
		while (true)
		{
			std::getline(file_input, str);
			if (str.empty())
				break;

			unsigned int wnid = atol(str.c_str());
			unsigned int classid = get_classid_by_wnid(wnid);
			classid_list.push_back(classid);
		}
	}
	std::cout << classid_list.size() << " labels read\n";

	nnforge::varying_data_stream_writer_smart_ptr validating_data_writer;
	{
		boost::filesystem::path validating_file_path = get_working_data_folder() / validating_data_filename;
		std::cout << "Writing validating data to " << validating_file_path.string() << "..." << std::endl;
		nnforge_shared_ptr<std::ofstream> validating_file(new boost::filesystem::ofstream(validating_file_path, std::ios_base::out | std::ios_base::binary | std::ios_base::trunc));
		validating_data_writer = nnforge::varying_data_stream_writer_smart_ptr(new nnforge::varying_data_stream_writer(
			validating_file,
			static_cast<unsigned int>(classid_list.size())));
	}

	boost::filesystem::path validating_images_folder_path = get_input_data_folder() / validating_images_folder_name;
	for(int i = 0; i < classid_list.size(); ++i)
	{
		unsigned int class_id = classid_list[i];
		unsigned int image_id = i + 1;
		boost::filesystem::path image_file_path = validating_images_folder_path / (boost::format("ILSVRC2012_val_%|1$08d|.JPEG") % image_id).str();

		write_supervised_data(image_file_path, *validating_data_writer, class_id);
	}
	std::cout << classid_list.size() << " entries written" << std::endl;
}
Пример #2
0
void gtsrb_toolset::write_folder(
	nnforge::supervised_data_stream_writer& writer,
	const boost::filesystem::path& relative_subfolder_path,
	const char * annotation_file_name,
	bool jitter)
{
	boost::filesystem::path subfolder_path = get_input_data_folder() / relative_subfolder_path;
	boost::filesystem::path annotation_file_path = subfolder_path / annotation_file_name;

	std::cout << "Reading input data from " << subfolder_path.string() << std::endl;

	boost::filesystem::ifstream file_input(annotation_file_path, std::ios_base::in);

	nnforge::random_generator generator = nnforge::rnd::get_random_generator();
	std::tr1::uniform_real<float> rotate_angle_distribution(-max_rotation_angle_in_degrees, max_rotation_angle_in_degrees);
	std::tr1::uniform_real<float> scale_distribution(1.0F / max_scale_factor, max_scale_factor);
	std::tr1::uniform_real<float> shift_distribution(-max_shift, max_shift);
	std::tr1::uniform_real<float> contrast_distribution(1.0F / max_contrast_factor, max_contrast_factor);
	std::tr1::uniform_real<float> brightness_shift_distribution(-max_brightness_shift, max_brightness_shift);

	std::string str;
	std::getline(file_input, str); // read the header
	while (true)
	{
		std::getline(file_input, str);

		std::vector<std::string> strs;
		boost::split(strs, str, boost::is_any_of(";"));

		if (strs.size() != 8)
			break;

		std::string file_name = strs[0];
		boost::filesystem::path absolute_file_path = subfolder_path / file_name;

		char* end;
		unsigned int top_left_x = static_cast<unsigned int>(strtol(strs[3].c_str(), &end, 10));
		unsigned int top_left_y = static_cast<unsigned int>(strtol(strs[4].c_str(), &end, 10));
		unsigned int bottom_right_x = static_cast<unsigned int>(strtol(strs[5].c_str(), &end, 10));
		unsigned int bottom_right_y = static_cast<unsigned int>(strtol(strs[6].c_str(), &end, 10));
		unsigned int class_id = static_cast<unsigned int>(strtol(strs[7].c_str(), &end, 10));

		if (jitter)
		{
			for(int i = 0; i < random_sample_count; ++i)
			{
				float rotation_angle = rotate_angle_distribution(generator);
				float scale = scale_distribution(generator);
				float shift_x = shift_distribution(generator);
				float shift_y = shift_distribution(generator);
				float contrast = contrast_distribution(generator);
				float brightness_shift = brightness_shift_distribution(generator);
				write_single_entry(
					writer,
					absolute_file_path,
					class_id,
					top_left_x,
					top_left_y,
					bottom_right_x,
					bottom_right_y,
					rotation_angle,
					scale,
					shift_x,
					shift_y,
					contrast,
					brightness_shift);
			}
		}
		else
		{
			write_single_entry(
				writer,
				absolute_file_path,
				class_id,
				top_left_x,
				top_left_y,
				bottom_right_x,
				bottom_right_y);
		}
	}
}
Пример #3
0
void imagenet_toolset::prepare_randomized_training_data()
{
	boost::filesystem::path training_images_folder_path = get_input_data_folder() / training_images_folder_name;
	unsigned int total_training_image_count = 0;
	std::cout << "Enumerating training images from " + training_images_folder_path.string() << "..." << std::endl;
	std::map<std::string, std::vector<unsigned int> > ilsvrc2014id_to_localid_list_map;
	{
		nnforge_regex folder_expression(ilsvrc2014id_pattern);
		nnforge_regex file_expression(training_image_filename_pattern);
		nnforge_cmatch what;
		for(boost::filesystem::directory_iterator it = boost::filesystem::directory_iterator(training_images_folder_path); it != boost::filesystem::directory_iterator(); ++it)
		{
			if (it->status().type() == boost::filesystem::directory_file)
			{
				boost::filesystem::path folder_path = it->path();
				std::string folder_name = folder_path.filename().string();
				if (nnforge_regex_match(folder_name, folder_expression))
				{
					const std::string& ilsvrc2014id = folder_name;
					unsigned int class_id = get_classid_by_wnid(get_wnid_by_ilsvrc2014id(ilsvrc2014id));
					std::vector<unsigned int>& localid_list =  ilsvrc2014id_to_localid_list_map.insert(std::make_pair(ilsvrc2014id, std::vector<unsigned int>())).first->second;
					for(boost::filesystem::directory_iterator it2 = boost::filesystem::directory_iterator(folder_path); it2 != boost::filesystem::directory_iterator(); ++it2)
					{
						if (it2->status().type() == boost::filesystem::regular_file)
						{
							boost::filesystem::path file_path = it2->path();
							std::string file_name = file_path.filename().string();
							if (nnforge_regex_search(file_name.c_str(), what, file_expression))
							{
								std::string ilsvrc2014id2 = std::string(what[1].first, what[1].second);
								int localid = atol(std::string(what[2].first, what[2].second).c_str());
								localid_list.push_back(localid);
								++total_training_image_count;
							}
						}
					}
				}
			}
		}
	}
	std::cout << total_training_image_count << " training images found\n";
	std::map<std::string, std::pair<unsigned int, float> > ilsvrc2014id_to_localid_count_and_remaining_ratio_map;
	for(std::map<std::string, std::vector<unsigned int> >::iterator it = ilsvrc2014id_to_localid_list_map.begin(); it != ilsvrc2014id_to_localid_list_map.end(); ++it)
		ilsvrc2014id_to_localid_count_and_remaining_ratio_map.insert(std::make_pair(it->first, std::make_pair(it->second.size(), it->second.size() > 0 ? 1.0F : 0.0F)));
	nnforge::random_generator rnd;

	nnforge::varying_data_stream_writer_smart_ptr training_data_writer;
	{
		boost::filesystem::path training_file_path = get_working_data_folder() / training_randomized_data_filename;
		std::cout << "Writing randomized training data to " << training_file_path.string() << "..." << std::endl;
		nnforge_shared_ptr<std::ofstream> training_file(new boost::filesystem::ofstream(training_file_path, std::ios_base::out | std::ios_base::binary | std::ios_base::trunc));
		training_data_writer = nnforge::varying_data_stream_writer_smart_ptr(new nnforge::varying_data_stream_writer(
			training_file,
			total_training_image_count));
	}

	std::vector<std::string> best_ilsvrc2014id_list;
	for(unsigned int entry_to_write_count = 0; entry_to_write_count < total_training_image_count; ++entry_to_write_count)
	{
		if (best_ilsvrc2014id_list.empty())
		{
			float best_ratio = -1.0F;
			for(std::map<std::string, std::pair<unsigned int, float> >::const_iterator it = ilsvrc2014id_to_localid_count_and_remaining_ratio_map.begin(); it != ilsvrc2014id_to_localid_count_and_remaining_ratio_map.end(); ++it)
			{
				float new_ratio = it->second.second;
				if (new_ratio > best_ratio)
				{
					best_ilsvrc2014id_list.clear();
					best_ilsvrc2014id_list.push_back(it->first);
					best_ratio = new_ratio;
				}
				else if (new_ratio == best_ratio)
					best_ilsvrc2014id_list.push_back(it->first);
			}
		}

		std::string best_ilsvrc2014id;
		{
			nnforge_uniform_int_distribution<unsigned int> dist(0, static_cast<unsigned int>(best_ilsvrc2014id_list.size()) - 1);
			unsigned int index = dist(rnd);
			best_ilsvrc2014id = best_ilsvrc2014id_list[index];
			best_ilsvrc2014id_list[index] = best_ilsvrc2014id_list.back();
			best_ilsvrc2014id_list.pop_back();
		}

		std::map<std::string, std::vector<unsigned int> >::iterator bucket_it = ilsvrc2014id_to_localid_list_map.find(best_ilsvrc2014id);
		std::vector<unsigned int>& localid_list = bucket_it->second;
		if (localid_list.empty())
			throw std::runtime_error("Unexpected error in prepare_training_data: No elements left");

		nnforge_uniform_int_distribution<unsigned int> dist(0, static_cast<unsigned int>(localid_list.size()) - 1);

		unsigned int index = dist(rnd);
		unsigned int local_id = localid_list[index];
		unsigned int leftover_local_id = localid_list[localid_list.size() - 1];
		localid_list[index] = leftover_local_id;
		localid_list.pop_back();
		std::map<std::string, std::pair<unsigned int, float> >::iterator it = ilsvrc2014id_to_localid_count_and_remaining_ratio_map.find(best_ilsvrc2014id);
		it->second.second = static_cast<float>(localid_list.size()) / static_cast<float>(it->second.first);

		std::string filename = (boost::format("%1%_%2%.JPEG") % best_ilsvrc2014id % local_id).str();
		boost::filesystem::path image_file_path = training_images_folder_path / best_ilsvrc2014id / filename;
		int class_id = get_classid_by_wnid(get_wnid_by_ilsvrc2014id(best_ilsvrc2014id));

		write_supervised_data(image_file_path, *training_data_writer, class_id);

		if (((entry_to_write_count + 1) % 100000) == 0)
			std::cout << (entry_to_write_count + 1) << " entries written" << std::endl;
	}
	std::cout << total_training_image_count << " entries written" << std::endl;
}
Пример #4
0
void imagenet_toolset::prepare_true_randomized_training_data()
{
	boost::filesystem::path training_images_folder_path = get_input_data_folder() / training_images_folder_name;
	std::cout << "Enumerating training images from " + training_images_folder_path.string() << "..." << std::endl;
	std::vector<std::pair<std::string, unsigned int> > ilsvrc2014id_localid_pair_list;
	{
		nnforge_regex folder_expression(ilsvrc2014id_pattern);
		nnforge_regex file_expression(training_image_filename_pattern);
		nnforge_cmatch what;
		for(boost::filesystem::directory_iterator it = boost::filesystem::directory_iterator(training_images_folder_path); it != boost::filesystem::directory_iterator(); ++it)
		{
			if (it->status().type() == boost::filesystem::directory_file)
			{
				boost::filesystem::path folder_path = it->path();
				std::string folder_name = folder_path.filename().string();
				if (nnforge_regex_match(folder_name, folder_expression))
				{
					const std::string& ilsvrc2014id = folder_name;
					unsigned int class_id = get_classid_by_wnid(get_wnid_by_ilsvrc2014id(ilsvrc2014id));
					for(boost::filesystem::directory_iterator it2 = boost::filesystem::directory_iterator(folder_path); it2 != boost::filesystem::directory_iterator(); ++it2)
					{
						if (it2->status().type() == boost::filesystem::regular_file)
						{
							boost::filesystem::path file_path = it2->path();
							std::string file_name = file_path.filename().string();
							if (nnforge_regex_search(file_name.c_str(), what, file_expression))
							{
								int localid = atol(std::string(what[2].first, what[2].second).c_str());
								ilsvrc2014id_localid_pair_list.push_back(std::make_pair(ilsvrc2014id, localid));
							}
						}
					}
				}
			}
		}
	}
	unsigned int total_training_image_count = static_cast<unsigned int>(ilsvrc2014id_localid_pair_list.size());
	std::cout << "Training images found: " << total_training_image_count << std::endl;

	nnforge::random_generator rnd;

	nnforge::varying_data_stream_writer_smart_ptr training_data_writer;
	{
		boost::filesystem::path training_file_path = get_working_data_folder() / training_randomized_data_filename;
		std::cout << "Writing randomized training data to " << training_file_path.string() << "..." << std::endl;
		nnforge_shared_ptr<std::ofstream> training_file(new boost::filesystem::ofstream(training_file_path, std::ios_base::out | std::ios_base::binary | std::ios_base::trunc));
		training_data_writer = nnforge::varying_data_stream_writer_smart_ptr(new nnforge::varying_data_stream_writer(
			training_file,
			static_cast<unsigned int>(ilsvrc2014id_localid_pair_list.size())));
	}

	for(unsigned int entry_written_count = 0; entry_written_count < total_training_image_count; ++entry_written_count)
	{
		nnforge_uniform_int_distribution<unsigned int> dist(0, static_cast<unsigned int>(ilsvrc2014id_localid_pair_list.size()) - 1);

		unsigned int index = dist(rnd);
		std::pair<std::string, unsigned int> ilsvrc2014id_localid_pair = ilsvrc2014id_localid_pair_list[index];
		ilsvrc2014id_localid_pair_list[index] = ilsvrc2014id_localid_pair_list[ilsvrc2014id_localid_pair_list.size() - 1];
		ilsvrc2014id_localid_pair_list.pop_back();

		std::string filename = (boost::format("%1%_%2%.JPEG") % ilsvrc2014id_localid_pair.first % ilsvrc2014id_localid_pair.second).str();
		boost::filesystem::path image_file_path = training_images_folder_path / ilsvrc2014id_localid_pair.first / filename;
		int class_id = get_classid_by_wnid(get_wnid_by_ilsvrc2014id(ilsvrc2014id_localid_pair.first));

		write_supervised_data(image_file_path, *training_data_writer, class_id);

		if (((entry_written_count + 1) % 100000) == 0)
			std::cout << (entry_written_count + 1) << " entries written" << std::endl;
	}
	std::cout << total_training_image_count << " entries written" << std::endl;
}