int main(int argc, char* argv[]) { /* CIF* cif = read_cif(std::string(argv[1])); std::cout << *cif; delete cif; */ std::string bam_file = argv[1]; std::string fasta_dir = argv[2]; std::string fasta_file = argv[3]; bool skip_soft_clipped = std::string(argv[4]).compare("true") == 0; int32_t max_read_length = atoi(argv[5]); std::ofstream output; output.open(argv[6], std::ofstream::out); compute_confusion_matrix(max_read_length, bam_file, fasta_file, fasta_dir, skip_soft_clipped, output); output.close(); }
int main(int argc, const char* argv[]) { try { // Parse command line arguments. TCLAP::CmdLine cmd("Depth RF trainer", ' ', "0.3"); TCLAP::ValueArg<std::string> image_list_file_arg("f", "image-list-file", "File containing the names of image files", true, "", "string", cmd); TCLAP::ValueArg<int> num_of_classes_arg("n", "num-of-classes", "Number of classes in the data", true, 1, "int", cmd); TCLAP::SwitchArg print_confusion_matrix_switch("m", "conf-matrix", "Print confusion matrix", cmd, true); TCLAP::ValueArg<int> background_label_arg("l", "background-label", "Lower bound of background labels to be ignored", false, -1, "int", cmd); TCLAP::ValueArg<std::string> json_forest_file_arg("j", "json-forest-file", "JSON file where the trained forest should be saved", false, "forest.json", "string"); TCLAP::ValueArg<std::string> binary_forest_file_arg("b", "binary-forest-file", "Binary file where the trained forest should be saved", false, "forest.bin", "string"); TCLAP::ValueArg<std::string> config_file_arg("c", "config", "YAML file with training parameters", false, "", "string", cmd); #if AIT_MULTI_THREADING TCLAP::ValueArg<int> num_of_threads_arg("t", "threads", "Number of threads to use", false, -1, "int", cmd); #endif cmd.xorAdd(json_forest_file_arg, binary_forest_file_arg); cmd.parse(argc, argv); const int num_of_classes = num_of_classes_arg.getValue(); bool print_confusion_matrix = print_confusion_matrix_switch.getValue(); const std::string image_list_file = image_list_file_arg.getValue(); // Initialize training and weak-learner parameters to defaults or load from file ForestTrainerT::ParametersT training_parameters; WeakLearnerT::ParametersT weak_learner_parameters; if (config_file_arg.isSet()) { ait::log_info(false) << "Reading config file " << config_file_arg.getValue() << "... " << std::flush; std::ifstream ifile_config(config_file_arg.getValue()); cereal::JSONInputArchive iarchive(ifile_config); iarchive(cereal::make_nvp("training_parameters", training_parameters)); iarchive(cereal::make_nvp("weak_learner_parameters", weak_learner_parameters)); ait::log_info(false) << " Done." << std::endl; } #if AIT_MULTI_THREADING if (num_of_threads_arg.isSet()) { training_parameters.num_of_threads = num_of_threads_arg.getValue(); } #endif // Read image file list ait::log_info(false) << "Reading image list ... " << std::flush; std::vector<std::tuple<std::string, std::string>> image_list; std::ifstream ifile(image_list_file); if (!ifile.good()) { throw std::runtime_error("Unable to open image list file"); } ait::CSVReader<std::string> csv_reader(ifile); for (auto it = csv_reader.begin(); it != csv_reader.end(); ++it) { if (it->size() != 2) { cmd.getOutput()->usage(cmd); ait::log_error() << "Image list file should contain two columns with the data and label filenames."; exit(-1); } const std::string& data_filename = (*it)[0]; const std::string& label_filename = (*it)[1]; boost::filesystem::path data_path = boost::filesystem::path(data_filename); boost::filesystem::path label_path = boost::filesystem::path(label_filename); if (!data_path.is_absolute()) { data_path = boost::filesystem::path(image_list_file).parent_path(); data_path /= data_filename; } if (!label_path.is_absolute()) { label_path = boost::filesystem::path(image_list_file).parent_path(); label_path /= label_filename; } image_list.push_back(std::make_tuple(data_path.string(), label_path.string())); } ait::log_info(false) << " Done." << std::endl; // TODO: Ensure that label images do not contain values > num_of_classes except for background pixels. Other approach: Test samples directly below. // Set lower bound for background pixel lables ait::label_type background_label; if (background_label_arg.isSet()) { background_label = background_label_arg.getValue(); } else { background_label = num_of_classes; } weak_learner_parameters.background_label = background_label; // Create weak learner and trainer. StatisticsT::Factory statistics_factory(num_of_classes); WeakLearnerT iwl(weak_learner_parameters, statistics_factory); ForestTrainerT trainer(iwl, training_parameters); SampleProviderT sample_provider(image_list, weak_learner_parameters); BaggingWrapperT bagging_wrapper(trainer, sample_provider); #ifdef AIT_TESTING RandomEngineT rnd_engine(11); #else std::random_device rnd_device; ait::log_info() << "rnd(): " << rnd_device(); RandomEngineT rnd_engine(rnd_device()); #endif // Train a forest and time it. auto start_time = std::chrono::high_resolution_clock::now(); // TODO // ForestTrainerT::ForestT forest = bagging_wrapper.train_forest(rnd_engine); // TODO: Testing all samples for comparison with depth_trainer sample_provider.clear_samples(); for (int i = 0; i < image_list.size(); ++i) { sample_provider.load_samples_from_image(i, rnd_engine); } SampleIteratorT samples_start = sample_provider.get_samples_begin(); SampleIteratorT samples_end = sample_provider.get_samples_end(); ait::log_info() << "Starting training ..."; ForestTrainerT::ForestT forest = trainer.train_forest(samples_start, samples_end, rnd_engine); auto stop_time = std::chrono::high_resolution_clock::now(); auto duration = stop_time - start_time; auto period = std::chrono::high_resolution_clock::period(); double elapsed_seconds = duration.count() * period.num / static_cast<double>(period.den); ait::log_info() << "Done."; ait::log_info() << "Running time: " << elapsed_seconds; // Optionally: Serialize forest to JSON file. if (json_forest_file_arg.isSet()) { { ait::log_info(false) << "Writing json forest file " << json_forest_file_arg.getValue() << "... " << std::flush; std::ofstream ofile(json_forest_file_arg.getValue()); cereal::JSONOutputArchive oarchive(ofile); oarchive(cereal::make_nvp("forest", forest)); ait::log_info(false) << " Done." << std::endl; } // Optionally: Serialize forest to binary file. } else if (binary_forest_file_arg.isSet()) { { ait::log_info(false) << "Writing binary forest file " << binary_forest_file_arg.getValue() << "... " << std::flush; std::ofstream ofile(binary_forest_file_arg.getValue(), std::ios_base::binary); cereal::BinaryOutputArchive oarchive(ofile); oarchive(cereal::make_nvp("forest", forest)); ait::log_info(false) << " Done." << std::endl; } } else { throw("This should never happen. Either a JSON or a binary forest file have to be specified!"); } // Optionally: Compute some stats and print them. if (print_confusion_matrix) { ait::log_info(false) << "Creating samples for testing ... " << std::flush; sample_provider.clear_samples(); for (int i = 0; i < image_list.size(); ++i) { sample_provider.load_samples_from_image(i, rnd_engine); } SampleIteratorT samples_start = sample_provider.get_samples_begin(); SampleIteratorT samples_end = sample_provider.get_samples_end(); ait::log_info(false) << " Done." << std::endl; std::vector<ait::size_type> sample_counts(num_of_classes, 0); for (auto sample_it = samples_start; sample_it != samples_end; sample_it++) { ++sample_counts[sample_it->get_label()]; } auto logger = ait::log_info(true); logger << "Sample counts>> "; for (int c = 0; c < num_of_classes; ++c) { if (c > 0) { logger << ", "; } logger << "class " << c << ": " << sample_counts[c]; } logger.close(); // For each tree extract leaf node indices for each sample. std::vector<std::vector<ait::size_type>> forest_leaf_indices = forest.evaluate(samples_start, samples_end); // Compute number of prediction matches based on a majority vote among the forest. int match = 0; int no_match = 0; for (auto tree_it = forest.cbegin(); tree_it != forest.cend(); ++tree_it) { for (auto sample_it = samples_start; sample_it != samples_end; sample_it++) { const auto &node_it = tree_it->cbegin() + (forest_leaf_indices[tree_it - forest.cbegin()][sample_it - samples_start]); const auto &statistics = node_it->get_statistics(); auto max_it = std::max_element(statistics.get_histogram().cbegin(), statistics.get_histogram().cend()); auto label = max_it - statistics.get_histogram().cbegin(); if (label == sample_it->get_label()) { match++; } else { no_match++; } } } ait::log_info() << "Match: " << match << ", no match: " << no_match; // Compute confusion matrix. auto forest_utils = ait::make_forest_utils(forest); auto confusion_matrix = forest_utils.compute_confusion_matrix(samples_start, samples_end); ait::log_info() << "Confusion matrix:" << std::endl << confusion_matrix; auto norm_confusion_matrix = ait::EvaluationUtils::normalize_confusion_matrix(confusion_matrix); ait::log_info() << "Normalized confusion matrix:" << std::endl << norm_confusion_matrix; ait::log_info() << "Diagonal of normalized confusion matrix:" << std::endl << norm_confusion_matrix.diagonal(); // Computing per-frame confusion matrix ait::log_info() << "Computing per-frame confusion matrix."; using ConfusionMatrixType = typename decltype(forest_utils)::MatrixType; ConfusionMatrixType per_frame_confusion_matrix(num_of_classes, num_of_classes); per_frame_confusion_matrix.setZero(); WeakLearnerT::ParametersT full_parameters(weak_learner_parameters); // Modify parameters to retrieve all pixels per sample full_parameters.samples_per_image_fraction = 1.0; SampleProviderT full_sample_provider(image_list, full_parameters); for (int i = 0; i < image_list.size(); ++i) { full_sample_provider.clear_samples(); full_sample_provider.load_samples_from_image(i, rnd_engine); samples_start = full_sample_provider.get_samples_begin(); samples_end = full_sample_provider.get_samples_end(); forest_utils.update_confusion_matrix(per_frame_confusion_matrix, samples_start, samples_end); } ait::log_info() << "Per-frame confusion matrix:" << std::endl << per_frame_confusion_matrix; ConfusionMatrixType per_frame_norm_confusion_matrix = ait::EvaluationUtils::normalize_confusion_matrix(per_frame_confusion_matrix); ait::log_info() << "Normalized per-frame confusion matrix:" << std::endl << per_frame_norm_confusion_matrix; ait::log_info() << "Diagonal of normalized per-frame confusion matrix:" << std::endl << per_frame_norm_confusion_matrix.diagonal(); ait::log_info() << "Mean of diagonal of normalized per-frame confusion matrix:" << std::endl << per_frame_norm_confusion_matrix.diagonal().mean(); } } catch (const std::runtime_error& error) { std::cerr << "Runtime exception occured" << std::endl; std::cerr << error.what() << std::endl; } return 0; }