Пример #1
0
TEST(CsvReader, TestReadLine) {
  CsvReader csv_reader(csv_read_file);
  std::vector<std::string> tokenized_line;

  int line_number = 0;
  EXPECT_TRUE(csv_reader.HasMoreLines());
  while (csv_reader.HasMoreLines()) {
    csv_reader.ReadLine(&tokenized_line);
    EXPECT_EQ(5, tokenized_line.size());

    for (int ii = 0; ii <= 4; ++ii) {
      EXPECT_EQ(1, tokenized_line[ii].size());
      const char expected = 'a' + (ii + line_number * 5);
      EXPECT_EQ(expected, tokenized_line[ii][0]);
    }

    if (line_number < 4) {
      EXPECT_TRUE(csv_reader.HasMoreLines());
    } else {
      EXPECT_FALSE(csv_reader.HasMoreLines());
    }

    line_number++;
  }
  EXPECT_FALSE(csv_reader.HasMoreLines());
}
Пример #2
0
TEST(CsvWriter, TestWriteLine) {

  // Write a bunch of lines to a csv file.
  CsvWriter csv_writer(csv_write_file);
  EXPECT_TRUE(csv_writer.IsOpen());

  std::vector<int> line1 = {1, 2, 3, 4, 5};
  std::vector<double> line2 = {6.0, 7.0, 8.0, 9.0, 10.0};
  std::vector<std::string> line3 = {"a", "b", "c", "d", "e"};
  Eigen::Matrix<double, 5, 1> line4;
  line4 << 11.0, 12.0, 13.0, 14.0, 15.0;

  EXPECT_TRUE(csv_writer.WriteLine(line1));
  EXPECT_TRUE(csv_writer.WriteLine(line2));
  EXPECT_TRUE(csv_writer.WriteLine(line3));
  EXPECT_TRUE(csv_writer.WriteLine(line4));
  EXPECT_TRUE(csv_writer.Close());

  // Read back the lines from the csv file.
  CsvReader csv_reader(csv_write_file);
  EXPECT_TRUE(csv_reader.IsOpen());
  EXPECT_TRUE(csv_reader.HasMoreLines());

  // First line.
  std::vector<std::string> tokenized_line;
  EXPECT_TRUE(csv_reader.ReadLine(&tokenized_line));
  EXPECT_EQ(line1.size(), tokenized_line.size());
  for (size_t ii = 0; ii < tokenized_line.size(); ++ii) {
    EXPECT_EQ(line1[ii], std::stoi(tokenized_line[ii]));
  }

  // Second line.
  EXPECT_TRUE(csv_reader.ReadLine(&tokenized_line));
  EXPECT_EQ(line2.size(), tokenized_line.size());
  for (size_t ii = 0; ii < tokenized_line.size(); ++ii) {
    EXPECT_EQ(line2[ii], std::stod(tokenized_line[ii]));
  }

  // Third line.
  EXPECT_TRUE(csv_reader.ReadLine(&tokenized_line));
  EXPECT_EQ(line3.size(), tokenized_line.size());
  for (size_t ii = 0; ii < tokenized_line.size(); ++ii) {
    EXPECT_EQ(line3[ii], tokenized_line[ii].c_str());
  }

  // Fourth line.
  EXPECT_TRUE(csv_reader.ReadLine(&tokenized_line));
  EXPECT_EQ(line4.size(), tokenized_line.size());
  for (size_t ii = 0; ii < tokenized_line.size(); ++ii) {
    EXPECT_EQ(line4(ii), std::stod(tokenized_line[ii]));
  }

  // Delete the file.
  std::remove(csv_write_file.c_str());
}
Пример #3
0
TEST(CsvReader, TestReadFile) {
  CsvReader csv_reader(csv_read_file);
  std::vector<std::vector<std::string>> tokenized_lines;

  EXPECT_TRUE(csv_reader.HasMoreLines());
  EXPECT_TRUE(csv_reader.ReadFile(&tokenized_lines));
  EXPECT_FALSE(csv_reader.HasMoreLines());
  ASSERT_EQ(5, tokenized_lines.size());

  for (int line = 0; line < tokenized_lines.size(); ++line) {
    for (int token = 0; token < tokenized_lines[line].size(); ++token) {
      ASSERT_EQ(1, tokenized_lines[line][token].size());

      const char expected = 'a' + (token + line * 5);
      EXPECT_EQ(expected, tokenized_lines[line][token][0]);
    }
  }
}
Пример #4
0
TEST(CsvWriter, TestWriteLines) {

  // Write a bunch of Eigen vectors as lines to a csv file.
  CsvWriter csv_writer(csv_write_file);
  EXPECT_TRUE(csv_writer.IsOpen());

  // Make 1000 random 100-dimensional vectors.
  std::vector<Eigen::VectorXd> lines;
  for (int ii = 0; ii < 1000; ++ii) {
    lines.push_back(Eigen::Matrix<double, 100, 1>::Random());
  }

  // Store the vectors in the csv file.
  EXPECT_TRUE(csv_writer.WriteLines(lines));
  EXPECT_TRUE(csv_writer.Close());

  // Read back the lines from the csv file.
  CsvReader csv_reader(csv_write_file);
  EXPECT_TRUE(csv_reader.IsOpen());
  EXPECT_TRUE(csv_reader.HasMoreLines());

  // Make sure all lines match the original vector.
  int line_number = 0;
  std::vector<std::string> tokenized_line;
  while (csv_reader.HasMoreLines()) {
    EXPECT_TRUE(csv_reader.ReadLine(&tokenized_line));
    EXPECT_EQ(100, tokenized_line.size());
    for (size_t ii = 0; ii < tokenized_line.size(); ++ii) {
      EXPECT_NEAR(lines[line_number](ii),
                  std::stod(tokenized_line[ii]),
                  1e-4);
    }
    line_number++;
  }

  // Delete the file.
  std::remove(csv_write_file.c_str());
}
int main(int argc, const char* argv[]) {
    try {
        // Parse command line arguments.
        TCLAP::CmdLine cmd("Depth RF trainer", ' ', "0.3");
        TCLAP::ValueArg<std::string> image_list_file_arg("f", "image-list-file", "File containing the names of image files", true, "", "string", cmd);
        TCLAP::ValueArg<int> num_of_classes_arg("n", "num-of-classes", "Number of classes in the data", true, 1, "int", cmd);
        TCLAP::SwitchArg print_confusion_matrix_switch("m", "conf-matrix", "Print confusion matrix", cmd, true);
        TCLAP::ValueArg<int> background_label_arg("l", "background-label", "Lower bound of background labels to be ignored", false, -1, "int", cmd);
        TCLAP::ValueArg<std::string> json_forest_file_arg("j", "json-forest-file", "JSON file where the trained forest should be saved", false, "forest.json", "string");
        TCLAP::ValueArg<std::string> binary_forest_file_arg("b", "binary-forest-file", "Binary file where the trained forest should be saved", false, "forest.bin", "string");
        TCLAP::ValueArg<std::string> config_file_arg("c", "config", "YAML file with training parameters", false, "", "string", cmd);
#if AIT_MULTI_THREADING
        TCLAP::ValueArg<int> num_of_threads_arg("t", "threads", "Number of threads to use", false, -1, "int", cmd);
#endif
        cmd.xorAdd(json_forest_file_arg, binary_forest_file_arg);
        cmd.parse(argc, argv);
        
        const int num_of_classes = num_of_classes_arg.getValue();
        bool print_confusion_matrix = print_confusion_matrix_switch.getValue();
        const std::string image_list_file = image_list_file_arg.getValue();

        // Initialize training and weak-learner parameters to defaults or load from file
        ForestTrainerT::ParametersT training_parameters;
        WeakLearnerT::ParametersT weak_learner_parameters;
        if (config_file_arg.isSet()) {
            ait::log_info(false) << "Reading config file " << config_file_arg.getValue() << "... " << std::flush;
            std::ifstream ifile_config(config_file_arg.getValue());
            cereal::JSONInputArchive iarchive(ifile_config);
            iarchive(cereal::make_nvp("training_parameters", training_parameters));
            iarchive(cereal::make_nvp("weak_learner_parameters", weak_learner_parameters));
            ait::log_info(false) << " Done." << std::endl;
        }
#if AIT_MULTI_THREADING
        if (num_of_threads_arg.isSet()) {
            training_parameters.num_of_threads = num_of_threads_arg.getValue();
        }
#endif

        // Read image file list
        ait::log_info(false) << "Reading image list ... " << std::flush;
        std::vector<std::tuple<std::string, std::string>> image_list;
        std::ifstream ifile(image_list_file);
        if (!ifile.good()) {
            throw std::runtime_error("Unable to open image list file");
        }
        ait::CSVReader<std::string> csv_reader(ifile);
        for (auto it = csv_reader.begin(); it != csv_reader.end(); ++it) {
            if (it->size() != 2) {
                cmd.getOutput()->usage(cmd);
                ait::log_error() << "Image list file should contain two columns with the data and label filenames.";
                exit(-1);
            }
            const std::string& data_filename = (*it)[0];
            const std::string& label_filename = (*it)[1];
            
            boost::filesystem::path data_path = boost::filesystem::path(data_filename);
            boost::filesystem::path label_path = boost::filesystem::path(label_filename);
            if (!data_path.is_absolute()) {
                data_path = boost::filesystem::path(image_list_file).parent_path();
                data_path /= data_filename;
            }
            if (!label_path.is_absolute()) {
                label_path = boost::filesystem::path(image_list_file).parent_path();
                label_path /= label_filename;
            }
            
            image_list.push_back(std::make_tuple(data_path.string(), label_path.string()));
        }
        ait::log_info(false) << " Done." << std::endl;
        
        // TODO: Ensure that label images do not contain values > num_of_classes except for background pixels. Other approach: Test samples directly below.
        
        // Set lower bound for background pixel lables
        ait::label_type background_label;
        if (background_label_arg.isSet()) {
            background_label = background_label_arg.getValue();
        } else {
            background_label = num_of_classes;
        }
        weak_learner_parameters.background_label = background_label;

        // Create weak learner and trainer.
        StatisticsT::Factory statistics_factory(num_of_classes);
        WeakLearnerT iwl(weak_learner_parameters, statistics_factory);
        ForestTrainerT trainer(iwl, training_parameters);
        SampleProviderT sample_provider(image_list, weak_learner_parameters);
        BaggingWrapperT bagging_wrapper(trainer, sample_provider);

#ifdef AIT_TESTING
        RandomEngineT rnd_engine(11);
#else
        std::random_device rnd_device;
        ait::log_info() << "rnd(): " << rnd_device();
        RandomEngineT rnd_engine(rnd_device());
#endif

        // Train a forest and time it.
        auto start_time = std::chrono::high_resolution_clock::now();
        // TODO
        //		ForestTrainerT::ForestT forest = bagging_wrapper.train_forest(rnd_engine);
        // TODO: Testing all samples for comparison with depth_trainer
        sample_provider.clear_samples();
        for (int i = 0; i < image_list.size(); ++i) {
            sample_provider.load_samples_from_image(i, rnd_engine);
        }
        SampleIteratorT samples_start = sample_provider.get_samples_begin();
        SampleIteratorT samples_end = sample_provider.get_samples_end();
        ait::log_info() << "Starting training ...";
        ForestTrainerT::ForestT forest = trainer.train_forest(samples_start, samples_end, rnd_engine);
        auto stop_time = std::chrono::high_resolution_clock::now();
        auto duration = stop_time - start_time;
        auto period = std::chrono::high_resolution_clock::period();
        double elapsed_seconds = duration.count() * period.num / static_cast<double>(period.den);
        ait::log_info() << "Done.";
        ait::log_info() << "Running time: " << elapsed_seconds;
        
        // Optionally: Serialize forest to JSON file.
        if (json_forest_file_arg.isSet()) {
            {
                ait::log_info(false) << "Writing json forest file " << json_forest_file_arg.getValue() << "... " << std::flush;
                std::ofstream ofile(json_forest_file_arg.getValue());
                cereal::JSONOutputArchive oarchive(ofile);
                oarchive(cereal::make_nvp("forest", forest));
                ait::log_info(false) << " Done." << std::endl;
            }
        // Optionally: Serialize forest to binary file.
        } else if (binary_forest_file_arg.isSet()) {
            {
                ait::log_info(false) << "Writing binary forest file " << binary_forest_file_arg.getValue() << "... " << std::flush;
                std::ofstream ofile(binary_forest_file_arg.getValue(), std::ios_base::binary);
                cereal::BinaryOutputArchive oarchive(ofile);
                oarchive(cereal::make_nvp("forest", forest));
                ait::log_info(false) << " Done." << std::endl;
            }
        } else {
            throw("This should never happen. Either a JSON or a binary forest file have to be specified!");
        }

        // Optionally: Compute some stats and print them.
        if (print_confusion_matrix) {
            ait::log_info(false) << "Creating samples for testing ... " << std::flush;
            sample_provider.clear_samples();
            for (int i = 0; i < image_list.size(); ++i) {
                sample_provider.load_samples_from_image(i, rnd_engine);
            }
            SampleIteratorT samples_start = sample_provider.get_samples_begin();
            SampleIteratorT samples_end = sample_provider.get_samples_end();
            ait::log_info(false) << " Done." << std::endl;
            
            std::vector<ait::size_type> sample_counts(num_of_classes, 0);
            for (auto sample_it = samples_start; sample_it != samples_end; sample_it++) {
                ++sample_counts[sample_it->get_label()];
            }
            auto logger = ait::log_info(true);
            logger << "Sample counts>> ";
            for (int c = 0; c < num_of_classes; ++c) {
                if (c > 0) {
                    logger << ", ";
                }
                logger << "class " << c << ": " << sample_counts[c];
            }
            logger.close();
            // For each tree extract leaf node indices for each sample.
            std::vector<std::vector<ait::size_type>> forest_leaf_indices = forest.evaluate(samples_start, samples_end);
            
            // Compute number of prediction matches based on a majority vote among the forest.
            int match = 0;
            int no_match = 0;
            for (auto tree_it = forest.cbegin(); tree_it != forest.cend(); ++tree_it) {
                for (auto sample_it = samples_start; sample_it != samples_end; sample_it++) {
                    const auto &node_it = tree_it->cbegin() + (forest_leaf_indices[tree_it - forest.cbegin()][sample_it - samples_start]);
                    const auto &statistics = node_it->get_statistics();
                    auto max_it = std::max_element(statistics.get_histogram().cbegin(), statistics.get_histogram().cend());
                    auto label = max_it - statistics.get_histogram().cbegin();
                    if (label == sample_it->get_label()) {
                        match++;
                    } else {
                        no_match++;
                    }
                }
            }
            ait::log_info() << "Match: " << match << ", no match: " << no_match;
            
            // Compute confusion matrix.
            auto forest_utils = ait::make_forest_utils(forest);
            auto confusion_matrix = forest_utils.compute_confusion_matrix(samples_start, samples_end);
            ait::log_info() << "Confusion matrix:" << std::endl << confusion_matrix;
            auto norm_confusion_matrix = ait::EvaluationUtils::normalize_confusion_matrix(confusion_matrix);
            ait::log_info() << "Normalized confusion matrix:" << std::endl << norm_confusion_matrix;
            ait::log_info() << "Diagonal of normalized confusion matrix:" << std::endl << norm_confusion_matrix.diagonal();
            
            // Computing per-frame confusion matrix
            ait::log_info() << "Computing per-frame confusion matrix.";
            using ConfusionMatrixType = typename decltype(forest_utils)::MatrixType;
            ConfusionMatrixType per_frame_confusion_matrix(num_of_classes, num_of_classes);
            per_frame_confusion_matrix.setZero();
            WeakLearnerT::ParametersT full_parameters(weak_learner_parameters);
            // Modify parameters to retrieve all pixels per sample
            full_parameters.samples_per_image_fraction = 1.0;
            SampleProviderT full_sample_provider(image_list, full_parameters);
            for (int i = 0; i < image_list.size(); ++i) {
                full_sample_provider.clear_samples();
                full_sample_provider.load_samples_from_image(i, rnd_engine);
                samples_start = full_sample_provider.get_samples_begin();
                samples_end = full_sample_provider.get_samples_end();
                forest_utils.update_confusion_matrix(per_frame_confusion_matrix, samples_start, samples_end);
            }
            ait::log_info() << "Per-frame confusion matrix:" << std::endl << per_frame_confusion_matrix;
            ConfusionMatrixType per_frame_norm_confusion_matrix = ait::EvaluationUtils::normalize_confusion_matrix(per_frame_confusion_matrix);
            ait::log_info() << "Normalized per-frame confusion matrix:" << std::endl << per_frame_norm_confusion_matrix;
            ait::log_info() << "Diagonal of normalized per-frame confusion matrix:" << std::endl << per_frame_norm_confusion_matrix.diagonal();
            ait::log_info() << "Mean of diagonal of normalized per-frame confusion matrix:" << std::endl << per_frame_norm_confusion_matrix.diagonal().mean();
        }

    } catch (const std::runtime_error& error) {
        std::cerr << "Runtime exception occured" << std::endl;
        std::cerr << error.what() << std::endl;
    }
    
    return 0;
}