예제 #1
0
int main(int argc, char* argv[])
{
  // Handle parameters.
  CLI::ParseCommandLine(argc, argv);

  const string inputModelFile = CLI::GetParam<string>("input_model_file");
  const string outputModelFile = CLI::GetParam<string>("output_model_file");
  const string outputPredictionsFile =
      CLI::GetParam<string>("output_predictions");
  const string trainingResponsesFile =
      CLI::GetParam<string>("training_responses");
  const string testFile = CLI::GetParam<string>("test_file");
  const string trainFile = CLI::GetParam<string>("training_file");
  const double lambda = CLI::GetParam<double>("lambda");

  mat regressors;
  mat responses;

  LinearRegression lr;
  lr.Lambda() = lambda;

  bool computeModel = false;

  // We want to determine if an input file XOR model file were given.
  if (!CLI::HasParam("training_file"))
  {
    if (!CLI::HasParam("input_model_file"))
      Log::Fatal << "You must specify either --input_file or --model_file."
          << endl;
    else // The model file was specified, no problems.
      computeModel = false;
  }
  // The user specified an input file but no model file, no problems.
  else if (!CLI::HasParam("input_model_file"))
    computeModel = true;
  // The user specified both an input file and model file.
  // This is ambiguous -- which model should we use? A generated one or given
  // one?  Report error and exit.
  else
  {
    Log::Fatal << "You must specify either --input_file or --model_file, not "
        << "both." << endl;
  }

  if (CLI::HasParam("test_file") && !CLI::HasParam("output_predictions"))
    Log::Warn << "--test_file (-t) specified, but --output_predictions "
        << "(-o) is not; no results will be saved." << endl;

  // If they specified a model file, we also need a test file or we
  // have nothing to do.
  if (!computeModel && !CLI::HasParam("test_file"))
  {
    Log::Fatal << "When specifying --model_file, you must also specify "
        << "--test_file." << endl;
  }

  if (!computeModel && CLI::HasParam("lambda"))
  {
    Log::Warn << "--lambda ignored because no model is being trained." << endl;
  }

  // An input file was given and we need to generate the model.
  if (computeModel)
  {
    Timer::Start("load_regressors");
    data::Load(trainFile, regressors, true);
    Timer::Stop("load_regressors");

    // Are the responses in a separate file?
    if (CLI::HasParam("training_responses"))
    {
      // The initial predictors for y, Nx1.
      responses = trans(regressors.row(regressors.n_rows - 1));
      regressors.shed_row(regressors.n_rows - 1);
    }
    else
    {
      // The initial predictors for y, Nx1.
      Timer::Start("load_responses");
      data::Load(trainingResponsesFile, responses, true);
      Timer::Stop("load_responses");

      if (responses.n_rows == 1)
        responses = trans(responses); // Probably loaded backwards.

      if (responses.n_cols > 1)
        Log::Fatal << "The responses must have one column.\n";

      if (responses.n_rows != regressors.n_cols)
        Log::Fatal << "The responses must have the same number of rows as the "
            "training file.\n";
    }

    Timer::Start("regression");
    lr = LinearRegression(regressors, responses.unsafe_col(0));
    Timer::Stop("regression");

    // Save the parameters.
    if (CLI::HasParam("output_model_file"))
      data::Save(outputModelFile, "linearRegressionModel", lr);
  }

  // Did we want to predict, too?
  if (CLI::HasParam("test_file"))
  {
    // A model file was passed in, so load it.
    if (!computeModel)
    {
      Timer::Start("load_model");
      data::Load(inputModelFile, "linearRegressionModel", lr, true);
      Timer::Stop("load_model");
    }

    // Load the test file data.
    arma::mat points;
    Timer::Start("load_test_points");
    data::Load(testFile, points, true);
    Timer::Stop("load_test_points");

    // Ensure that test file data has the right number of features.
    if ((lr.Parameters().n_elem - 1) != points.n_rows)
    {
      Log::Fatal << "The model was trained on " << lr.Parameters().n_elem - 1
          << "-dimensional data, but the test points in '" << testFile
          << "' are " << points.n_rows << "-dimensional!" << endl;
    }

    // Perform the predictions using our model.
    arma::vec predictions;
    Timer::Start("prediction");
    lr.Predict(points, predictions);
    Timer::Stop("prediction");

    // Save predictions.
    if (CLI::HasParam("output_predictions"))
      data::Save(outputPredictionsFile, predictions, true, false);
  }
}
예제 #2
0
static void mlpackMain()
{
  const double lambda = CLI::GetParam<double>("lambda");

  RequireOnlyOnePassed({ "training", "input_model" }, true);

  ReportIgnoredParam({{ "test", true }}, "output_predictions");

  mat regressors;
  rowvec responses;

  LinearRegression lr;

  const bool computeModel = !CLI::HasParam("input_model");
  const bool computePrediction = CLI::HasParam("test");

  // If they specified a model file, we also need a test file or we
  // have nothing to do.
  if (!computeModel)
  {
    RequireAtLeastOnePassed({ "test" }, true, "test points must be specified "
        "when an input model is given");
  }

  ReportIgnoredParam({{ "input_model", true }}, "lambda");

  RequireAtLeastOnePassed({ "output_model", "output_predictions" }, false,
      "no output will be saved");

  // An input file was given and we need to generate the model.
  if (computeModel)
  {
    Timer::Start("load_regressors");
    regressors = std::move(CLI::GetParam<mat>("training"));
    Timer::Stop("load_regressors");

    // Are the responses in a separate file?
    if (!CLI::HasParam("training_responses"))
    {
      // The initial predictors for y, Nx1.
      if (regressors.n_rows < 2)
      {
        Log::Fatal << "Can't get responses from training data "
            "since it has less than 2 rows." << endl;
      }
      responses = regressors.row(regressors.n_rows - 1);
      regressors.shed_row(regressors.n_rows - 1);
    }
    else
    {
      // The initial predictors for y, Nx1.
      Timer::Start("load_responses");
      responses = CLI::GetParam<rowvec>("training_responses");
      Timer::Stop("load_responses");

      if (responses.n_cols != regressors.n_cols)
      {
        Log::Fatal << "The responses must have the same number of columns "
            "as the training set." << endl;
      }
    }

    Timer::Start("regression");
    lr = LinearRegression(regressors, responses, lambda);
    Timer::Stop("regression");
  }
  else
  {
    // A model file was passed in, so load it.
    Timer::Start("load_model");
    lr = std::move(CLI::GetParam<LinearRegression>("input_model"));
    Timer::Stop("load_model");
  }

  // Did we want to predict, too?
  if (computePrediction)
  {
    // Load the test file data.
    Timer::Start("load_test_points");
    mat points = std::move(CLI::GetParam<mat>("test"));
    Timer::Stop("load_test_points");

    // Ensure that test file data has the right number of features.
    if ((lr.Parameters().n_elem - 1) != points.n_rows)
    {
      Log::Fatal << "The model was trained on " << lr.Parameters().n_elem - 1
          << "-dimensional data, but the test points in '"
          << CLI::GetPrintableParam<mat>("test") << "' are " << points.n_rows
          << "-dimensional!" << endl;
    }

    // Perform the predictions using our model.
    rowvec predictions;
    Timer::Start("prediction");
    lr.Predict(points, predictions);
    Timer::Stop("prediction");

    // Save predictions.
    if (CLI::HasParam("output_predictions"))
      CLI::GetParam<rowvec>("output_predictions") = std::move(predictions);
  }

  // Save the model if needed.
  if (CLI::HasParam("output_model"))
    CLI::GetParam<LinearRegression>("output_model") = std::move(lr);
}