void merge_metadata_files ( const command_line_parser& parser ) { image_dataset_metadata::dataset src, dest; load_image_dataset_metadata(src, parser.option("add").argument(0)); load_image_dataset_metadata(dest, parser.option("add").argument(1)); std::map<string,image_dataset_metadata::image> merged_data; for (unsigned long i = 0; i < dest.images.size(); ++i) merged_data[dest.images[i].filename] = dest.images[i]; // now add in the src data and overwrite anything if there are duplicate entries. for (unsigned long i = 0; i < src.images.size(); ++i) merged_data[src.images[i].filename] = src.images[i]; // copy merged data into dest dest.images.clear(); for (std::map<string,image_dataset_metadata::image>::const_iterator i = merged_data.begin(); i != merged_data.end(); ++i) { dest.images.push_back(i->second); } save_image_dataset_metadata(dest, "merged.xml"); }
void flip_dataset(const command_line_parser& parser) { image_dataset_metadata::dataset metadata, orig_metadata; string datasource; if (parser.option("flip")) datasource = parser.option("flip").argument(); else datasource = parser.option("flip-basic").argument(); load_image_dataset_metadata(metadata,datasource); orig_metadata = metadata; // Set the current directory to be the one that contains the // metadata file. We do this because the file might contain // file paths which are relative to this folder. set_current_dir(get_parent_directory(file(datasource))); const string metadata_filename = get_parent_directory(file(datasource)).full_name() + directory::get_separator() + "flipped_" + file(datasource).name(); array2d<rgb_pixel> img, temp; for (unsigned long i = 0; i < metadata.images.size(); ++i) { file f(metadata.images[i].filename); string filename = get_parent_directory(f).full_name() + directory::get_separator() + "flipped_" + to_png_name(f.name()); load_image(img, metadata.images[i].filename); flip_image_left_right(img, temp); if (parser.option("jpg")) { filename = to_jpg_name(filename); save_jpeg(temp, filename,JPEG_QUALITY); } else { save_png(temp, filename); } for (unsigned long j = 0; j < metadata.images[i].boxes.size(); ++j) { metadata.images[i].boxes[j].rect = impl::flip_rect_left_right(metadata.images[i].boxes[j].rect, get_rect(img)); // flip all the object parts for (auto& part : metadata.images[i].boxes[j].parts) { part.second = impl::flip_rect_left_right(rectangle(part.second,part.second), get_rect(img)).tl_corner(); } } metadata.images[i].filename = filename; } if (!parser.option("flip-basic")) make_part_labeling_match_target_dataset(orig_metadata, metadata); save_image_dataset_metadata(metadata, metadata_filename); }
static double get_mlp_num_iterations ( command_line_parser& parser, std::vector<dense_sample_type>& dense_samples ) { int num_iterations = 5000; if (parser.option ("mlp-num-iterations")) { num_iterations = sa = parser.option("mlp-num-iterations").argument(); } return num_iterations; }
static double get_mlp_hidden_units ( command_line_parser& parser, std::vector<dense_sample_type>& dense_samples ) { int num_hidden = 5; if (parser.option ("mlp-hidden-units")) { num_hidden = sa = parser.option("mlp-hidden-units").argument(); } return num_hidden; }
static void krr_rbk_test ( command_line_parser& parser, std::vector<dense_sample_type>& dense_samples, std::vector<double>& labels ) { typedef radial_basis_kernel<dense_sample_type> kernel_type; krr_trainer<kernel_type> trainer; option_range gamma_range; double best_gamma = DBL_MAX; float best_loo = FLT_MAX; get_rbk_gamma (parser, dense_samples, gamma_range); for (float gamma = gamma_range.get_min_value(); gamma <= gamma_range.get_max_value(); gamma = gamma_range.get_next_value (gamma)) { // LOO cross validation std::vector<double> loo_values; if (parser.option("verbose")) { trainer.set_search_lambdas(logspace(-9, 4, 100)); trainer.be_verbose(); } trainer.set_kernel (kernel_type (gamma)); trainer.train (dense_samples, labels, loo_values); const double loo_error = mean_squared_error(loo_values, labels); if (loo_error < best_loo) { best_loo = loo_error; best_gamma = gamma; } printf ("10^%f %9.6f\n", log10(gamma), loo_error); } printf ("Best result: gamma=10^%f (%g), loo_error=%9.6f\n", log10(best_gamma), best_gamma, best_loo); if (parser.option("train-best")) { printf ("Training network with best parameters\n"); trainer.set_kernel (kernel_type (best_gamma)); decision_function<kernel_type> best_network = trainer.train (dense_samples, labels); std::ofstream fout (parser.option("train-best").argument().c_str(), std::ios::binary); serialize (best_network, fout); fout.close(); } }
int tile_dataset(const command_line_parser& parser) { if (parser.number_of_arguments() != 1) { cerr << "The --tile option requires you to give one XML file on the command line." << endl; return EXIT_FAILURE; } string out_image = parser.option("tile").argument(); string ext = right_substr(out_image,"."); if (ext != "png" && ext != "jpg") { cerr << "The output image file must have either .png or .jpg extension." << endl; return EXIT_FAILURE; } const unsigned long chip_size = get_option(parser, "size", 8000); dlib::image_dataset_metadata::dataset data; load_image_dataset_metadata(data, parser[0]); locally_change_current_dir chdir(get_parent_directory(file(parser[0]))); dlib::array<array2d<rgb_pixel> > images; console_progress_indicator pbar(data.images.size()); for (unsigned long i = 0; i < data.images.size(); ++i) { // don't even bother loading images that don't have objects. if (data.images[i].boxes.size() == 0) continue; pbar.print_status(i); array2d<rgb_pixel> img; load_image(img, data.images[i].filename); // figure out what chips we want to take from this image std::vector<chip_details> dets; for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) { if (data.images[i].boxes[j].ignore) continue; rectangle rect = data.images[i].boxes[j].rect; dets.push_back(chip_details(rect, chip_size)); } // Now grab all those chips at once. dlib::array<array2d<rgb_pixel> > chips; extract_image_chips(img, dets, chips); // and put the chips into the output. for (unsigned long j = 0; j < chips.size(); ++j) images.push_back(chips[j]); } chdir.revert(); if (ext == "png") save_png(tile_images(images), out_image); else save_jpeg(tile_images(images), out_image); return EXIT_SUCCESS; }
void create_new_dataset ( const command_line_parser& parser ) { using namespace dlib::image_dataset_metadata; const std::string filename = parser.option("c").argument(); // make sure the file exists so we can use the get_parent_directory() command to // figure out it's parent directory. make_empty_file(filename); const std::string parent_dir = get_parent_directory(file(filename)); unsigned long depth = 0; if (parser.option("r")) depth = 30; dataset meta; meta.name = "imglab dataset"; meta.comment = "Created by imglab tool."; for (unsigned long i = 0; i < parser.number_of_arguments(); ++i) { try { const string temp = strip_path(file(parser[i]), parent_dir); meta.images.push_back(image(temp)); } catch (dlib::file::file_not_found&) { // then parser[i] should be a directory std::vector<file> files = get_files_in_directory_tree(parser[i], match_endings(".png .PNG .jpeg .JPEG .jpg .JPG .bmp .BMP .dng .DNG"), depth); sort(files.begin(), files.end()); for (unsigned long j = 0; j < files.size(); ++j) { meta.images.push_back(image(strip_path(files[j], parent_dir))); } } } save_image_dataset_metadata(meta, filename); }
static double get_svr_epsilon_insensitivity ( command_line_parser& parser, std::vector<dense_sample_type>& dense_samples ) { // Epsilon-insensitive regression means we do regression but stop // trying to fit a data point once it is "close enough" to its // target value. This parameter is the value that controls what // we mean by "close enough". In this case, I'm saying I'm happy // if the resulting regression function gets within 0.001 of the // target value. double epsilon_insensitivity = 0.001; if (parser.option ("svr-epsilon-insensitivity")) { epsilon_insensitivity = sa = parser.option("svr-epsilon-insensitivity").argument(); } return epsilon_insensitivity; }
void convert_idl( const command_line_parser& parser ) { cout << "Convert from IDL annotation format..." << endl; dlib::image_dataset_metadata::dataset dataset; for (unsigned long i = 0; i < parser.number_of_arguments(); ++i) { parse_annotation_file(parser[i], dataset); } const std::string filename = parser.option("c").argument(); save_image_dataset_metadata(dataset, filename); }
int split_dataset ( const command_line_parser& parser ) { if (parser.number_of_arguments() != 1) { cerr << "The --split option requires you to give one XML file on the command line." << endl; return EXIT_FAILURE; } const std::string label = parser.option("split").argument(); dlib::image_dataset_metadata::dataset data, data_with, data_without; load_image_dataset_metadata(data, parser[0]); data_with.name = data.name; data_with.comment = data.comment; data_without.name = data.name; data_without.comment = data.comment; for (unsigned long i = 0; i < data.images.size(); ++i) { auto&& temp = data.images[i]; bool has_the_label = false; // check for the label we are looking for for (unsigned long j = 0; j < temp.boxes.size(); ++j) { if (temp.boxes[j].label == label) { has_the_label = true; break; } } if (has_the_label) data_with.images.push_back(temp); else data_without.images.push_back(temp); } save_image_dataset_metadata(data_with, left_substr(parser[0],".") + "_with_"+label + ".xml"); save_image_dataset_metadata(data_without, left_substr(parser[0],".") + "_without_"+label + ".xml"); return EXIT_SUCCESS; }
void flip_dataset(const command_line_parser& parser) { #ifdef DLIB_PNG_SUPPORT image_dataset_metadata::dataset metadata; const string datasource = parser.option("flip").argument(); load_image_dataset_metadata(metadata,datasource); // Set the current directory to be the one that contains the // metadata file. We do this because the file might contain // file paths which are relative to this folder. set_current_dir(get_parent_directory(file(datasource))); const string metadata_filename = get_parent_directory(file(datasource)).full_name() + directory::get_separator() + "flipped_" + file(datasource).name(); array2d<rgb_pixel> img, temp; for (unsigned long i = 0; i < metadata.images.size(); ++i) { file f(metadata.images[i].filename); const string filename = get_parent_directory(f).full_name() + directory::get_separator() + "flipped_" + to_png_name(f.name()); load_image(img, metadata.images[i].filename); flip_image_left_right(img, temp); save_png(temp, filename); for (unsigned long j = 0; j < metadata.images[i].boxes.size(); ++j) { metadata.images[i].boxes[j].rect = impl::flip_rect_left_right(metadata.images[i].boxes[j].rect, get_rect(img)); // flip all the object parts std::map<std::string,point>::iterator k; for (k = metadata.images[i].boxes[j].parts.begin(); k != metadata.images[i].boxes[j].parts.end(); ++k) { k->second = impl::flip_rect_left_right(rectangle(k->second,k->second), get_rect(img)).tl_corner(); } } metadata.images[i].filename = filename; } save_image_dataset_metadata(metadata, metadata_filename); #else throw dlib::error("imglab must be compiled with libpng if you want to use the --flip option."); #endif }
void convert_pascal_xml( const command_line_parser& parser ) { cout << "Convert from PASCAL XML annotation format..." << endl; dlib::image_dataset_metadata::dataset dataset; std::string name; dlib::image_dataset_metadata::image img; const std::string filename = parser.option("c").argument(); // make sure the file exists so we can use the get_parent_directory() command to // figure out it's parent directory. make_empty_file(filename); const std::string parent_dir = get_parent_directory(file(filename)).full_name(); for (unsigned long i = 0; i < parser.number_of_arguments(); ++i) { try { parse_annotation_file(parser[i], img, name); const string root = get_parent_directory(get_parent_directory(file(parser[i]))).full_name(); const string img_path = root + directory::get_separator() + "JPEGImages" + directory::get_separator(); dataset.name = name; img.filename = strip_path(img_path + img.filename, parent_dir); dataset.images.push_back(img); } catch (exception& e) { cout << "Error while processing file " << parser[i] << endl << endl; throw; } } save_image_dataset_metadata(dataset, filename); }
static void parse_args (command_line_parser& parser, int argc, char* argv[]) { try { // Algorithm-independent options parser.add_option ("a", "Choose the learning algorithm: {krls,krr,mlp,svr}.",1); parser.add_option ("h","Display this help message."); parser.add_option ("help","Display this help message."); parser.add_option ("k", "Learning kernel (for krls,krr,svr methods): {lin,rbk}.",1); parser.add_option ("in","A libsvm-formatted file to test.",1); parser.add_option ("normalize", "Normalize the sample inputs to zero-mean unit variance."); parser.add_option ("train-best", "Train and save a network using best parameters", 1); parser.set_group_name("Algorithm Specific Options"); // Algorithm-specific options parser.add_option ("rbk-gamma", "Width of radial basis kernels: {float}.",1); parser.add_option ("krls-tolerance", "Numerical tolerance of krls linear dependency test: {float}.",1); parser.add_option ("mlp-hidden-units", "Number of hidden units in mlp: {integer}.",1); parser.add_option ("mlp-num-iterations", "Number of epochs to train the mlp: {integer}.",1); parser.add_option ("svr-c", "SVR regularization parameter \"C\": " "{float}.",1); parser.add_option ("svr-epsilon-insensitivity", "SVR fitting tolerance parameter: " "{float}.",1); parser.add_option ("verbose", "Use verbose trainers"); // Parse the command line arguments parser.parse(argc,argv); // Check that options aren't given multiple times const char* one_time_opts[] = {"a", "h", "help", "in"}; parser.check_one_time_options(one_time_opts); const char* valid_kernels[] = {"lin", "rbk"}; const char* valid_algs[] = {"krls", "krr", "mlp", "svr"}; parser.check_option_arg_range("a", valid_algs); parser.check_option_arg_range("k", valid_kernels); parser.check_option_arg_range("rbk-gamma", 1e-200, 1e200); parser.check_option_arg_range("krls-tolerance", 1e-200, 1e200); parser.check_option_arg_range("mlp-hidden-units", 1, 10000000); parser.check_option_arg_range("mlp-num-iterations", 1, 10000000); parser.check_option_arg_range("svr-c", 1e-200, 1e200); parser.check_option_arg_range("svr-epsilon-insensitivity", 1e-200, 1e200); // Check if the -h option was given if (parser.option("h") || parser.option("help")) { std::cout << "Usage: mltool [-a algorithm] --in input_file\n"; parser.print_options(std::cout); std::cout << std::endl; exit (0); } // Check that an input file was given if (!parser.option("in")) { std::cout << "Error in command line:\n" << "You must specify an input file with the --in option.\n" << "\nTry the -h option for more information\n"; exit (0); } } catch (std::exception& e) { // Catch cmd_line_parse_error exceptions and print usage message. std::cout << e.what() << std::endl; exit (1); } catch (...) { std::cout << "Some error occurred" << std::endl; } }
int resample_dataset(const command_line_parser& parser) { if (parser.number_of_arguments() != 1) { cerr << "The --resample option requires you to give one XML file on the command line." << endl; return EXIT_FAILURE; } const size_t obj_size = get_option(parser,"cropped-object-size",100*100); const double margin_scale = get_option(parser,"crop-size",2.5); // cropped image will be this times wider than the object. const unsigned long min_object_size = get_option(parser,"min-object-size",1); const bool one_object_per_image = parser.option("one-object-per-image"); dlib::image_dataset_metadata::dataset data, resampled_data; std::ostringstream sout; sout << "\nThe --resample parameters which generated this dataset were:" << endl; sout << " cropped-object-size: "<< obj_size << endl; sout << " crop-size: "<< margin_scale << endl; sout << " min-object-size: "<< min_object_size << endl; if (one_object_per_image) sout << " one_object_per_image: true" << endl; resampled_data.comment = data.comment + sout.str(); resampled_data.name = data.name + " RESAMPLED"; load_image_dataset_metadata(data, parser[0]); locally_change_current_dir chdir(get_parent_directory(file(parser[0]))); dlib::rand rnd; const size_t image_size = std::round(std::sqrt(obj_size*margin_scale*margin_scale)); const chip_dims cdims(image_size, image_size); console_progress_indicator pbar(data.images.size()); for (unsigned long i = 0; i < data.images.size(); ++i) { // don't even bother loading images that don't have objects. if (data.images[i].boxes.size() == 0) continue; pbar.print_status(i); array2d<rgb_pixel> img, chip; load_image(img, data.images[i].filename); // figure out what chips we want to take from this image for (unsigned long j = 0; j < data.images[i].boxes.size(); ++j) { const rectangle rect = data.images[i].boxes[j].rect; if (data.images[i].boxes[j].ignore || rect.area() < min_object_size) continue; const auto max_dim = std::max(rect.width(), rect.height()); const double rand_scale_perturb = 1 - 0.3*(rnd.get_random_double()-0.5); const rectangle crop_rect = centered_rect(rect, max_dim*margin_scale*rand_scale_perturb, max_dim*margin_scale*rand_scale_perturb); const rectangle_transform tform = get_mapping_to_chip(chip_details(crop_rect, cdims)); extract_image_chip(img, chip_details(crop_rect, cdims), chip); image_dataset_metadata::image dimg; // Now transform the boxes to the crop and also mark them as ignored if they // have already been cropped out or are outside the crop. for (size_t k = 0; k < data.images[i].boxes.size(); ++k) { image_dataset_metadata::box box = data.images[i].boxes[k]; // ignore boxes outside the cropped image if (crop_rect.intersect(box.rect).area() == 0) continue; // mark boxes we include in the crop as ignored. Also mark boxes that // aren't totally within the crop as ignored. if (crop_rect.contains(grow_rect(box.rect,10)) && (!one_object_per_image || k==j)) data.images[i].boxes[k].ignore = true; else box.ignore = true; if (box.rect.area() < min_object_size) box.ignore = true; box.rect = tform(box.rect); for (auto&& p : box.parts) p.second = tform.get_tform()(p.second); dimg.boxes.push_back(box); } // Put a 64bit hash of the image data into the name to make sure there are no // file name conflicts. std::ostringstream sout; sout << hex << murmur_hash3_128bit(&chip[0][0], chip.size()*sizeof(chip[0][0])).second; dimg.filename = data.images[i].filename + "_RESAMPLED_"+sout.str()+".png"; if (parser.option("jpg")) { dimg.filename = to_jpg_name(dimg.filename); save_jpeg(chip,dimg.filename, JPEG_QUALITY); } else { save_png(chip,dimg.filename); } resampled_data.images.push_back(dimg); } } save_image_dataset_metadata(resampled_data, parser[0] + ".RESAMPLED.xml"); return EXIT_SUCCESS; }
void rotate_dataset(const command_line_parser& parser) { image_dataset_metadata::dataset metadata; const string datasource = parser[0]; load_image_dataset_metadata(metadata,datasource); double angle = get_option(parser, "rotate", 0); // Set the current directory to be the one that contains the // metadata file. We do this because the file might contain // file paths which are relative to this folder. set_current_dir(get_parent_directory(file(datasource))); const string file_prefix = "rotated_"+ cast_to_string(angle) + "_"; const string metadata_filename = get_parent_directory(file(datasource)).full_name() + directory::get_separator() + file_prefix + file(datasource).name(); array2d<rgb_pixel> img, temp; for (unsigned long i = 0; i < metadata.images.size(); ++i) { file f(metadata.images[i].filename); string filename = get_parent_directory(f).full_name() + directory::get_separator() + file_prefix + to_png_name(f.name()); load_image(img, metadata.images[i].filename); const point_transform_affine tran = rotate_image(img, temp, angle*pi/180); if (parser.option("jpg")) { filename = to_jpg_name(filename); save_jpeg(temp, filename,JPEG_QUALITY); } else { save_png(temp, filename); } for (unsigned long j = 0; j < metadata.images[i].boxes.size(); ++j) { const rectangle rect = metadata.images[i].boxes[j].rect; rectangle newrect; newrect += tran(rect.tl_corner()); newrect += tran(rect.tr_corner()); newrect += tran(rect.bl_corner()); newrect += tran(rect.br_corner()); // now make newrect have the same area as the starting rect. double ratio = std::sqrt(rect.area()/(double)newrect.area()); newrect = centered_rect(newrect, newrect.width()*ratio, newrect.height()*ratio); metadata.images[i].boxes[j].rect = newrect; // rotate all the object parts std::map<std::string,point>::iterator k; for (k = metadata.images[i].boxes[j].parts.begin(); k != metadata.images[i].boxes[j].parts.end(); ++k) { k->second = tran(k->second); } } metadata.images[i].filename = filename; } save_image_dataset_metadata(metadata, metadata_filename); }