Beispiel #1
0
int main() {
    LinearRegression lr;
    vector<DataGroup> train_set;

    DataGroup train1, train2, train3, train4, train5;
    //y = 3 * x1 - 5 * x2 + 3;
    DataGroup test1;
    //1  1 1
    train1.in.push_back(1); train1.in.push_back(1); train1.out.push_back(1);
    //1 -1 1
    train2.in.push_back(2); train2.in.push_back(1); train2.out.push_back(4);
    //-1 1 1
    train3.in.push_back(-1); train3.in.push_back(3); train3.out.push_back(-15);
    //0  0 1
    train4.in.push_back(9); train4.in.push_back(1); train4.out.push_back(25);
    //-1 -1 0
    train5.in.push_back(0); train5.in.push_back(1); train5.out.push_back(-2);
    train_set.push_back(train1);
    train_set.push_back(train2);
    train_set.push_back(train3);
    train_set.push_back(train4);
    train_set.push_back(train5);
    lr.train(train_set, 1e-5);

    test1.in.push_back(1); test1.in.push_back(1); test1.out.push_back(0);
    lr.predict(test1);
    cout << "result:" << test1.out[0] << endl;
    return 0;
}
TEUCHOS_UNIT_TEST( Rythmos_ConvergenceTestHelpers, zeroData ) {
  LinearRegression<double> lr;
  Array<double> x,y;
  x.push_back(0.0);
  x.push_back(0.0);
  y.push_back(1.0);
  y.push_back(2.0);
  TEST_THROW(lr.setData(x,y), std::logic_error);
}
TEUCHOS_UNIT_TEST( Rythmos_ConvergenceTestHelpers, trivialData ) {
  LinearRegression<double> lr;
  Array<double> x,y;
  x.push_back(0.0);
  x.push_back(1.0);
  y.push_back(0.0);
  y.push_back(1.0);
  lr.setData(x,y);
  TEST_EQUALITY_CONST( lr.getSlope(), 1.0 );
  TEST_EQUALITY_CONST( lr.getYIntercept(), 0.0 );
}
int main(int argc, char* argv[]) {
  LinearRegression lr;
  LinearRegressionScoreTest lrst;
  LinearRegressionPermutationTest lrpt;

  Vector y;
  Matrix x;

  LoadVector("input.y", y);
  LoadMatrix("input.x", x);

  if (lr.FitLinearModel(x, y) == false) {
    fprintf(stderr, "Fitting failed!\n");
    return -1;
  }

  Vector& beta = lr.GetCovEst();
  Matrix& v = lr.GetCovB();
  Vector& pWald = lr.GetAsyPvalue();

  fprintf(stdout, "wald_beta\t");
  Print(beta);
  fputc('\n', stdout);

  fprintf(stdout, "wald_vcov\t");
  Print(v);
  fputc('\n', stdout);

  fprintf(stdout, "wald_p\t");
  Print(pWald[1]);
  fputc('\n', stdout);

  if (lrpt.FitLinearModel(x, 1, y, 200, 0.05) == false) {
    fprintf(stderr, "Fitting failed!\n");
    return -1;
  }

  fprintf(stdout, "permutation_p\t");
  double permu_p = lrpt.getPvalue();
  Print(permu_p);
  fputc('\n', stdout);

  if (lrst.FitLinearModel(x, y, 1) == false) {
    fprintf(stderr, "Fitting failed!\n");
    return -1;
  }

  fprintf(stdout, "score_p\t");
  double score_p = lrst.GetPvalue();
  Print(score_p);
  fputc('\n', stdout);

  return 0;
};
int main(int argc, char** argv) {
  QApplication app(argc, argv);
  std::vector<Eigen::Matrix<double, 2, 1> > data;
  LinearRegression<2> dist;
  for (double x = -10; x < 10; x += 0.01) {
    dist.setBasis((Eigen::Matrix<double, 1, 1>() << x).finished());
    data.push_back(dist.getSample());
  }
  ScatterPlot<2> plot("LinearRegressionRndScatterPlot1v", data);
  plot.show();
  return app.exec();
}
int main(int argc, char** argv) {
  QApplication app(argc, argv);
  std::vector<Eigen::Matrix<double, 3, 1> > data;
  LinearRegression<3> dist;
  for (double x = -10; x < 10; x += 0.1)
    for (double y = -10; y < 10; y += 0.1) {
      dist.setBasis(Eigen::Matrix<double, 2, 1>(x, y));
      data.push_back(dist.getSample());
     }
  ScatterPlot<3> plot("LinearRegressionRndScatterPlot2v", data);
  plot.show();
  return app.exec();
}
TEUCHOS_UNIT_TEST( Rythmos_ConvergenceTestHelpers, nonUniqueXData ) {
  LinearRegression<double> lr;
  Array<double> x,y;
  x.push_back(0.0);
  x.push_back(1.0);
  x.push_back(1.0);
  y.push_back(0.0);
  y.push_back(0.5);
  y.push_back(1.5);
  lr.setData(x,y);
  double tol = 1.0e-10;
  TEST_FLOATING_EQUALITY( lr.getSlope(), 1.0, tol );
  TEST_FLOATING_EQUALITY( lr.getYIntercept(), 0.0, tol );
}
TEUCHOS_UNIT_TEST( Rythmos_ConvergenceTestHelpers, CoSineData ) {
  LinearRegression<double> lr;
  Array<double> x,y;
  int N = 11;
  double dt = 0.1;
  for (int i=0 ; i<N ; ++i) {
    double xval = dt*i;
    x.push_back( xval );
    y.push_back( cos(xval) );
  }
  lr.setData(x,y);
  // 1.0e-14 works on rancilio but not on gabriel, exetazo, or s858352
  double tol = 1.0e-13;
  TEST_FLOATING_EQUALITY( lr.getSlope(), -4.653508042678562e-01, tol ); // These came from matlab
  TEST_FLOATING_EQUALITY( lr.getYIntercept(), 1.067025181571952, tol );
}
TEUCHOS_UNIT_TEST( Rythmos_ConvergenceTestHelpers, SineData ) {
  LinearRegression<double> lr;
  Array<double> x,y;
  int N = 11;
  double dt = 0.1;
  for (int i=0 ; i<N ; ++i) {
    double xval = dt*i;
    x.push_back( xval );
    y.push_back( sin(xval) );
  }
  lr.setData(x,y);
  // 1.0e-14 works on rancilio but not on gabriel, exetazo, or s858352
  double tol = 1.0e-13;
  TEST_FLOATING_EQUALITY( lr.getSlope(), 8.518189335013251e-01, tol ); // These came from matlab
  TEST_FLOATING_EQUALITY( lr.getYIntercept(), 2.989789515694744e-02, tol );
}
void LDA_predict(const std::string &year) {
	string type = "simulation";	
	//string modelFile = "LDA_result/" + year + "LDA_model_whole.csv";	
	string modelFile = "LDA_result/" + year + "LDA_model.csv";
	string testFile = "training_set/" + type + "/" + year + "/" + year + "training_set_normalise" + ".csv"; 
 	string label_file = "training_set/" + type + "/" + year + "/" + year + "label_set" + ".csv"; 
	// string label_file = "wholeTrainingset/" + year + "label_set_whole.csv";	
	LinearRegression lr;
  	lr = LinearRegression(modelFile);
	
	arma::mat points;
    	data::Load(testFile, points, true);
    	
    // Load the test file data.
   // arma::mat points;
   // data::Load(training_file, points, true);
  
    // Perform the predictions using our model.
    arma::vec predictions;
    lr.Predict(points, predictions);

    // Save predictions.
    //predictions = arma::trans(predictions);
	string predictFile = "LDA_result/" + year + "LDA_prediction.csv";
    data::Save(predictFile, predictions, true);

	std::vector<int> labels = read_line(label_file);
	std::vector<int> p = read_line (predictFile);
	int num = 0;
	if (p.size() == labels.size()) {
		for (unsigned i = 0; i < p.size(); i++) {
			if (p[i] > 0) {
				p[i] = 1;
			} else {
				p[i] = -1;
			}
			if (p[i] == labels[i]) {
				num++;
			}
		}
	}
	double rate = (double)num/(double)p.size();
	std::cout<<num<<"/"<<p.size()<<" "<<rate<<endl;
}
Beispiel #11
0
void ClothoidPath::OptimiseLine( const CarModel& cm, int idx, int step,	double hLimit, PathPt* l3, const PathPt* l2, const PathPt* l4 )
{
	LinearRegression	l;

	const int NSEG = m_pTrack->GetSize();

	int i = (idx + NSEG - step) % NSEG;
	while( m_pPath[i].h > hLimit )
	{
		l.Sample( m_pPath[i].pt.GetXY() );
		i = (i + NSEG - step) % NSEG;
	}

	l.Sample( m_pPath[i].pt.GetXY() );

	i = idx;
	while( m_pPath[i].h > hLimit )
	{
		l.Sample( m_pPath[i].pt.GetXY() );
		i = (i + step) % NSEG;
	}

	l.Sample( m_pPath[i].pt.GetXY() );

    GfOut( "%4d  ", idx );
	Vec2d	p, v;
	l.CalcLine( p, v );

	double	t;
	Utils::LineCrossesLine( l3->Pt().GetXY(), l3->Norm().GetXY(), p, v, t );

	SetOffset( cm, 0, t, l3, l2, l4 );
}
Scalar computeLinearRegressionSlope(Array<Scalar>& x, Array<Scalar>& y) 
{
  LinearRegression<Scalar> lr;
  lr.setData(x,y);
  return(lr.getSlope());
}
  /*
       Computes the linear regression for a series of measurements, the
       x-axis intercept of the regression line and its confidence interval, and
       writes a couple of files from which a nice plot of all this can be
       generated using the gnuplot program.
  */
  bool computeRegressionAndWriteGnuplotFiles_(vector<double>::const_iterator const conc_vec_begin,
                                              vector<double>::const_iterator const conc_vec_end,
                                              vector<double>::const_iterator const area_vec_begin,
                                              double const confidence_p,
                                              String const filename_prefix,
                                              String const output_filename,
                                              String const format = "",
                                              bool const write_gnuplot = true
                                              )
  {

    try
    {
      LinearRegression linreg;
      linreg.computeRegression(confidence_p, conc_vec_begin, conc_vec_end, area_vec_begin);

      if (write_gnuplot)
      {

        // the peak data goes here
        String datafilename(filename_prefix);
        datafilename += String(".dat");
        ofstream dataout(datafilename.c_str());

        // the gnuplot commands go here
        String commandfilename(filename_prefix);
        commandfilename += String(".cmd");
        ofstream cmdout(commandfilename.c_str());

        // the error bar for the x-axis intercept goes here
        String errorbarfilename(filename_prefix);
        errorbarfilename += String(".err");
        ofstream errout(errorbarfilename.c_str());

        // writing the commands
        cmdout <<
        "set ylabel \"ion count\"\n"
        "set xlabel \"concentration\"\n"
        "set key left Left reverse\n";

        if (!format.empty())
        {
          if (format == "png")
          {
            cmdout <<
            "set terminal png \n"
            "set output \"" << filename_prefix << ".png\"\n";
          }
          else if (format == "eps")
          {
            cmdout <<
            "set terminal postscript eps \n"
            "set output \"" << filename_prefix << ".eps\"\n";
          }

        }
        cmdout <<
        "plot \""  << datafilename << "\"  w points ps 2 pt 1 lt 8 title \"data\" "            // want data on first line of key
                                      ",  " << linreg.getIntercept() << "+" <<  linreg.getSlope() << "*x lt 2 lw 3 title \"linear regression: "
        << linreg.getIntercept() << " + " <<  linreg.getSlope() << " * x\" "
                                                                   ", \""  << datafilename << "\"  w points ps 2 pt 1 lt 8 notitle " // draw data a second time, on top of reg. line
                                                                                              ", \"" << errorbarfilename << "\"  using ($1):(0) w points pt 13 ps 2 lt 1 title \"x-intercept: " << linreg.getXIntercept() << "\" "
                                                                                                                                                                                                                             ", \"" << errorbarfilename << "\"  w xerrorbars lw 3 lt 1 title \"95% interval: [ " << linreg.getLower() << ", " << linreg.getUpper() << " ]\"\n";
        cmdout.close();

        // writing the x-axis intercept error bar
        errout << linreg.getXIntercept() << " 0 " << linreg.getLower() << " " << linreg.getUpper() << endl;
        errout.close();

        // writing the peak data points
        vector<double>::const_iterator cit = conc_vec_begin;
        vector<double>::const_iterator ait = area_vec_begin;
        dataout.precision(writtenDigits<double>(0.0));
        for (; cit != conc_vec_end; ++cit, ++ait)
        {
          dataout << *cit << ' ' << *ait << '\n';
        }
        dataout.close();

      }       // end if (write_gnuplot)

      // write results to XML file
      ofstream results;
      results.open(output_filename.c_str());

      results << "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>" << endl;
      results << "<results_additiveseries>" << endl;
      results << "\t<slope>" << linreg.getSlope() << "</slope>" << endl;
      results << "\t<intercept>" << linreg.getIntercept() << "</intercept>" << endl;
      results << "\t<x_intercept>" << linreg.getXIntercept() << "</x_intercept>" << endl;
      results << "\t<confidence_lowerlimit>" << linreg.getLower() << "</confidence_lowerlimit>" << endl;
      results << "\t<confidence_upperlimit>" << linreg.getUpper() << "</confidence_upperlimit>" << endl;
      results << "\t<pearson_squared>" << linreg.getRSquared() << "</pearson_squared>" << endl;
      results << "\t<std_residuals>" << linreg.getStandDevRes() << "</std_residuals>" << endl;
      results << "\t<t_statistic>" << linreg.getTValue() << "</t_statistic>" << endl;
      results << "</results_additiveseries>" << endl;

      results.close();
    }
    catch (string & s)
    {
      cout << s <<  endl;
      return 1;
    }

    return 0;
  }
Beispiel #14
0
void test_linearity() {
	Radix r;
	SimpleLSystemWithBranching sls;

	int b_index = 3;
	int grid_size = 10;
	//int grid_size = 5;

	int N = 10000;
	cv::Mat_<double> X(N, (grid_size - b_index - 2) * 2 + b_index);
	//cv::Mat_<double> X(N, (grid_size - 3) * 2 + 1);
	cv::Mat_<double> Y(N, grid_size * grid_size);
	
	for (int i = 0; i < N; ++i) {
		cv::Mat_<double> param(1, X.cols);
		for (int c = 0; c < X.cols; ++c) {
			param(0, c) = rand() % 3 - 1;
		}
		param.copyTo(X.row(i));
	}

	cv::Mat_<double> X2(X.rows, X.cols);

	int count = 0;
	for (int i = 0; i < X.rows; ++i) {
		try {
			//cv::Mat_<double> density = sls.computeDensity(grid_size, X.row(i), true, true);
			cv::Mat_<double> density = sls.computeDensity(grid_size, X.row(i), true, false);
			density.copyTo(Y.row(count));
			X.row(i).copyTo(X2.row(count));
			count++;
		} catch (char* ex) {
			//cout << "conflict" << endl;
		}
			
	}

	ml::saveDataset("dataX.txt", X2(cv::Rect(0, 0, X2.cols, count)));
	ml::saveDataset("dataY.txt", Y(cv::Rect(0, 0, Y.cols, count)));


	//cv::Mat_<double> X, Y;
	ml::loadDataset("dataX.txt", X);
	ml::loadDataset("dataY.txt", Y);

	cv::Mat_<double> trainX, trainY;
	cv::Mat_<double> testX, testY;
	ml::splitDataset(X, 0.8, trainX, testX);
	ml::splitDataset(Y, 0.8, trainY, testY);

	// Forward
	{
		LinearRegression lr;
		lr.train(trainX, trainY);
		cv::Mat_<double> Y_hat = lr.predict(testX);

		cv::Mat_<double> Y_avg;
		cv::reduce(trainY, Y_avg, 0, CV_REDUCE_AVG);
		Y_avg = cv::repeat(Y_avg, testY.rows, 1);

		cout << "-----------------------" << endl;
		cout << "Forward:" << endl;
		cout << "RMSE: " << ml::rmse(testY, Y_hat, true) << endl;
		cout << "Baselime: " << ml::rmse(testY, Y_avg, true) << endl;
	}

	// Inverse
	{
		LinearRegression lr;
		lr.train(trainY, trainX);
		
		cv::Mat_<double> X_hat = lr.predict(testY);

		// Xの各値を-1,0,1にdiscretizeする
		{
			for (int r = 0; r < X_hat.rows; ++r) {
				for (int c = 0; c < X_hat.cols; ++c) {
					if (X_hat(r, c) < -0.5) {
						X_hat(r, c) = -1;
					} else if (X_hat(r,c ) > 0.5) {
						X_hat(r, c) = 1;
					} else {
						X_hat(r, c) = 0;
					}
				}
			}
		}
		
		for (int i = 0; i < testX.cols; ++i) {
			cout << ml::rmse(testX.col(i), X_hat.col(i), true) << endl;
		}


		cv::Mat_<double> Y_hat(testY.rows, testY.cols);
		for (int i = 0; i < testX.rows; ++i) {
			cv::Mat_<double> density_hat = sls.computeDensity(grid_size, X_hat.row(i), true, false);
			density_hat.copyTo(Y_hat.row(i));
		}

		cv::Mat X_avg;
		cv::reduce(trainX, X_avg, 0, CV_REDUCE_AVG);
		double baselineX = ml::rmse(testX, cv::repeat(X_avg, testX.rows, 1), true);

		cv::Mat Y_avg = sls.computeDensity(grid_size, X_avg, true, false);
		//cv::reduce(trainY, Y_avg, 0, CV_REDUCE_AVG);
		double baselineY = ml::rmse(testY, cv::repeat(Y_avg, testY.rows, 1), true);

		cout << "-----------------------" << endl;
		cout << "Inverse:" << endl;
		cout << "RMSE in Parameter: " << ml::rmse(testX, X_hat, true) << endl;
		cout << "Baselime: " << baselineX << endl;
		cout << "RMSE in Indicator: " << ml::rmse(testY, Y_hat, true) << endl;
		cout << "Baseline: " << baselineY << endl;
	}
}
Beispiel #15
0
int main(int argc, char** argv) {
  LinearRegression<2> linearRegression;
  std::cout << "Default parameters: " << std::endl << linearRegression
    << std::endl;
  std::cout << "l.getLinearBasisFunction(): " << std::endl <<
    linearRegression.getLinearBasisFunction() << std::endl;
  std::cout << "l.getVariance(): " << linearRegression.getVariance()
    << std::endl;
  std::cout <<  "l.setLinearBasisFunction(2.0, 2.0)" << std::endl;
  std::cout << "l.setVariance(2.0)" << std::endl;
  std::cout << "l.setBasis(2.0)" << std::endl;
  linearRegression.setLinearBasisFunction(LinearBasisFunction<double, 2>(
    Eigen::Matrix<double, 2, 1>(2.0, 2.0)));
  linearRegression.setVariance(2.0);
  linearRegression.setBasis((Eigen::Matrix<double, 1, 1>() << 2.0).finished());
  if (linearRegression.getLinearBasisFunction().getCoefficients() !=
    Eigen::Matrix<double, 2, 1>(2.0, 2.0))
    return 1;
  if (linearRegression.getVariance() != 2.0)
    return 1;
  if (linearRegression.getBasis() !=
      (Eigen::Matrix<double, 1, 1>() << 2.0).finished())
    return 1;
  std::cout << "New parameters: " << std::endl << linearRegression
    << std::endl;
  std::cout << "l.getValue(2): " << linearRegression(
    Eigen::Matrix<double, 2, 1>(2, 6)) << std::endl;
  std::cout << "l.getSample(): " << std::endl << linearRegression.getSample()
    << std::endl;
  std::cout << "l.getMean(): " << linearRegression.getMean() << std::endl;

  LinearRegression<2> linCopy(linearRegression);
  std::cout << "Copy constructor: " << std::endl << linCopy << std::endl
    << std::endl;
  if (linCopy.getLinearBasisFunction().getCoefficients() !=
      linearRegression.getLinearBasisFunction().getCoefficients())
    return 1;
  if (linCopy.getVariance() != linearRegression.getVariance())
    return 1;
  LinearRegression<2> linAssign = linearRegression;
  std::cout << "Assignment operator: " << std::endl << linAssign << std::endl;
  if (linAssign.getLinearBasisFunction().getCoefficients() !=
      linearRegression.getLinearBasisFunction().getCoefficients())
    return 1;
  if (linAssign.getVariance() != linearRegression.getVariance())
    return 1;

  return 0;
}
Beispiel #16
0
int DataLoader::useResidualAsPhenotype() {
  if (binaryPhenotype) {
    logger->warn(
        "WARNING: Skip transforming binary phenotype, although you want to "
        "use residual as phenotype!");
    return 0;
  }

  LinearRegression lr;
  Vector pheno;
  Matrix covAndInt;
  const int numCovariate = covariate.ncol();

  copyPhenotype(phenotype, &pheno);
  copyCovariateAndIntercept(pheno.Length(), covariate, &covAndInt);
  if (!lr.FitLinearModel(covAndInt, pheno)) {
    if (numCovariate > 0) {
      logger->error(
          "Cannot fit model: [ phenotype ~ 1 + covariates ], now use the "
          "original phenotype");
    } else {
      logger->error(
          "Cannot fit model: [ phenotype ~ 1 ], now use the "
          "original phenotype");
    }
  } else {  // linear model fitted successfully
    copyVectorToMatrixColumn(lr.GetResiduals(), &phenotype, 0);
    // const int n = lr.GetResiduals().Length();
    // for (int i = 0; i < n; ++i) {
    //   // phenotypeInOrder[i] = lr.GetResiduals()[i];
    //   phenotype[i][0] = lr.GetResiduals()[i];
    // }
    covariate.clear();
    if (numCovariate > 0) {
      logger->info(
          "DONE: Fit model [ phenotype ~ 1 + covariates ] and model "
          "residuals will be used as responses");
    } else {
      logger->info("DONE: Use residual as phenotype by centerng it");
    }

    // store fitting results
    Vector& beta = lr.GetCovEst();
    Matrix& betaSd = lr.GetCovB();
    const int n = beta.Length();
    for (int i = 0; i < n; ++i) {
      addFittedParameter(covAndInt.GetColumnLabel(i), beta[i], betaSd[i][i]);
    }
    addFittedParameter("Sigma2", lr.GetSigma2(), NAN);
  }

#if 0
  if (covariate.ncol() > 0) {
    LinearRegression lr;
    Vector pheno;
    Matrix covAndInt;
    copyPhenotype(phenotype, &pheno);
    copyCovariateAndIntercept(covariate.nrow(), covariate, &covAndInt);
    if (!lr.FitLinearModel(covAndInt, pheno)) {
      logger->error(
          "Cannot fit model: [ phenotype ~ 1 + covariates ], now use the "
          "original phenotype");
    } else {
      const int n = lr.GetResiduals().Length();
      for (int i = 0; i < n; ++i) {
        // phenotypeInOrder[i] = lr.GetResiduals()[i];
        phenotype[i][0] = lr.GetResiduals()[i];
      }
      covariate.clear();
      logger->info(
          "DONE: Fit model [ phenotype ~ 1 + covariates ] and model "
          "residuals will be used as responses");
    }
    storeFittedModel(lr);
  } else {  // no covaraites
    // centerVector(&phenotypeInOrder);
    std::vector<double> v;
    phenotype.extractCol(0, &v);
    centerVector(&v);
    phenotype.setCol(0, v);

    logger->info("DONE: Use residual as phenotype by centerng it");
  }
#endif

  return 0;
}
static void mlpackMain()
{
  const double lambda = CLI::GetParam<double>("lambda");

  RequireOnlyOnePassed({ "training", "input_model" }, true);

  ReportIgnoredParam({{ "test", true }}, "output_predictions");

  mat regressors;
  rowvec responses;

  LinearRegression lr;

  const bool computeModel = !CLI::HasParam("input_model");
  const bool computePrediction = CLI::HasParam("test");

  // If they specified a model file, we also need a test file or we
  // have nothing to do.
  if (!computeModel)
  {
    RequireAtLeastOnePassed({ "test" }, true, "test points must be specified "
        "when an input model is given");
  }

  ReportIgnoredParam({{ "input_model", true }}, "lambda");

  RequireAtLeastOnePassed({ "output_model", "output_predictions" }, false,
      "no output will be saved");

  // An input file was given and we need to generate the model.
  if (computeModel)
  {
    Timer::Start("load_regressors");
    regressors = std::move(CLI::GetParam<mat>("training"));
    Timer::Stop("load_regressors");

    // Are the responses in a separate file?
    if (!CLI::HasParam("training_responses"))
    {
      // The initial predictors for y, Nx1.
      if (regressors.n_rows < 2)
      {
        Log::Fatal << "Can't get responses from training data "
            "since it has less than 2 rows." << endl;
      }
      responses = regressors.row(regressors.n_rows - 1);
      regressors.shed_row(regressors.n_rows - 1);
    }
    else
    {
      // The initial predictors for y, Nx1.
      Timer::Start("load_responses");
      responses = CLI::GetParam<rowvec>("training_responses");
      Timer::Stop("load_responses");

      if (responses.n_cols != regressors.n_cols)
      {
        Log::Fatal << "The responses must have the same number of columns "
            "as the training set." << endl;
      }
    }

    Timer::Start("regression");
    lr = LinearRegression(regressors, responses, lambda);
    Timer::Stop("regression");
  }
  else
  {
    // A model file was passed in, so load it.
    Timer::Start("load_model");
    lr = std::move(CLI::GetParam<LinearRegression>("input_model"));
    Timer::Stop("load_model");
  }

  // Did we want to predict, too?
  if (computePrediction)
  {
    // Load the test file data.
    Timer::Start("load_test_points");
    mat points = std::move(CLI::GetParam<mat>("test"));
    Timer::Stop("load_test_points");

    // Ensure that test file data has the right number of features.
    if ((lr.Parameters().n_elem - 1) != points.n_rows)
    {
      Log::Fatal << "The model was trained on " << lr.Parameters().n_elem - 1
          << "-dimensional data, but the test points in '"
          << CLI::GetPrintableParam<mat>("test") << "' are " << points.n_rows
          << "-dimensional!" << endl;
    }

    // Perform the predictions using our model.
    rowvec predictions;
    Timer::Start("prediction");
    lr.Predict(points, predictions);
    Timer::Stop("prediction");

    // Save predictions.
    if (CLI::HasParam("output_predictions"))
      CLI::GetParam<rowvec>("output_predictions") = std::move(predictions);
  }

  // Save the model if needed.
  if (CLI::HasParam("output_model"))
    CLI::GetParam<LinearRegression>("output_model") = std::move(lr);
}
int main(int argc, char* argv[])
{
  // Handle parameters.
  CLI::ParseCommandLine(argc, argv);

  const string inputModelFile = CLI::GetParam<string>("input_model_file");
  const string outputModelFile = CLI::GetParam<string>("output_model_file");
  const string outputPredictionsFile =
      CLI::GetParam<string>("output_predictions");
  const string trainingResponsesFile =
      CLI::GetParam<string>("training_responses");
  const string testFile = CLI::GetParam<string>("test_file");
  const string trainFile = CLI::GetParam<string>("training_file");
  const double lambda = CLI::GetParam<double>("lambda");

  mat regressors;
  mat responses;

  LinearRegression lr;
  lr.Lambda() = lambda;

  bool computeModel = false;

  // We want to determine if an input file XOR model file were given.
  if (!CLI::HasParam("training_file"))
  {
    if (!CLI::HasParam("input_model_file"))
      Log::Fatal << "You must specify either --input_file or --model_file."
          << endl;
    else // The model file was specified, no problems.
      computeModel = false;
  }
  // The user specified an input file but no model file, no problems.
  else if (!CLI::HasParam("input_model_file"))
    computeModel = true;
  // The user specified both an input file and model file.
  // This is ambiguous -- which model should we use? A generated one or given
  // one?  Report error and exit.
  else
  {
    Log::Fatal << "You must specify either --input_file or --model_file, not "
        << "both." << endl;
  }

  if (CLI::HasParam("test_file") && !CLI::HasParam("output_predictions"))
    Log::Warn << "--test_file (-t) specified, but --output_predictions "
        << "(-o) is not; no results will be saved." << endl;

  // If they specified a model file, we also need a test file or we
  // have nothing to do.
  if (!computeModel && !CLI::HasParam("test_file"))
  {
    Log::Fatal << "When specifying --model_file, you must also specify "
        << "--test_file." << endl;
  }

  if (!computeModel && CLI::HasParam("lambda"))
  {
    Log::Warn << "--lambda ignored because no model is being trained." << endl;
  }

  // An input file was given and we need to generate the model.
  if (computeModel)
  {
    Timer::Start("load_regressors");
    data::Load(trainFile, regressors, true);
    Timer::Stop("load_regressors");

    // Are the responses in a separate file?
    if (CLI::HasParam("training_responses"))
    {
      // The initial predictors for y, Nx1.
      responses = trans(regressors.row(regressors.n_rows - 1));
      regressors.shed_row(regressors.n_rows - 1);
    }
    else
    {
      // The initial predictors for y, Nx1.
      Timer::Start("load_responses");
      data::Load(trainingResponsesFile, responses, true);
      Timer::Stop("load_responses");

      if (responses.n_rows == 1)
        responses = trans(responses); // Probably loaded backwards.

      if (responses.n_cols > 1)
        Log::Fatal << "The responses must have one column.\n";

      if (responses.n_rows != regressors.n_cols)
        Log::Fatal << "The responses must have the same number of rows as the "
            "training file.\n";
    }

    Timer::Start("regression");
    lr = LinearRegression(regressors, responses.unsafe_col(0));
    Timer::Stop("regression");

    // Save the parameters.
    if (CLI::HasParam("output_model_file"))
      data::Save(outputModelFile, "linearRegressionModel", lr);
  }

  // Did we want to predict, too?
  if (CLI::HasParam("test_file"))
  {
    // A model file was passed in, so load it.
    if (!computeModel)
    {
      Timer::Start("load_model");
      data::Load(inputModelFile, "linearRegressionModel", lr, true);
      Timer::Stop("load_model");
    }

    // Load the test file data.
    arma::mat points;
    Timer::Start("load_test_points");
    data::Load(testFile, points, true);
    Timer::Stop("load_test_points");

    // Ensure that test file data has the right number of features.
    if ((lr.Parameters().n_elem - 1) != points.n_rows)
    {
      Log::Fatal << "The model was trained on " << lr.Parameters().n_elem - 1
          << "-dimensional data, but the test points in '" << testFile
          << "' are " << points.n_rows << "-dimensional!" << endl;
    }

    // Perform the predictions using our model.
    arma::vec predictions;
    Timer::Start("prediction");
    lr.Predict(points, predictions);
    Timer::Stop("prediction");

    // Save predictions.
    if (CLI::HasParam("output_predictions"))
      data::Save(outputPredictionsFile, predictions, true, false);
  }
}
TEUCHOS_UNIT_TEST( Rythmos_ConvergenceTestHelpers, create ) {
  LinearRegression<double> lr;
  TEST_THROW(lr.getSlope(), std::logic_error);
  TEST_THROW(lr.getYIntercept(), std::logic_error);
}
Beispiel #20
0
bool train( CommandLineParser &parser ){

    infoLog << "Training regression model..." << endl;

    string trainDatasetFilename = "";
    string modelFilename = "";
    float learningRate = 0;
    float minChange = 0;
    unsigned int maxEpoch = 0;
    unsigned int batchSize = 0;

    //Get the filename
    if( !parser.get("filename",trainDatasetFilename) ){
        errorLog << "Failed to parse filename from command line! You can set the filename using the -f." << endl;
        printHelp();
        return false;
    }
    
    //Get the parameters from the parser
    parser.get("model-filename",modelFilename);
    parser.get( "learning-rate", learningRate );
    parser.get( "min-change", minChange );
    parser.get( "max-epoch", maxEpoch );
    parser.get( "batch-size", batchSize );

    infoLog << "settings: learning-rate: " << learningRate << " min-change: " << minChange << " max-epoch: " << maxEpoch << " batch-size: " << batchSize << endl;

    //Load the training data to train the model
    RegressionData trainingData;

    //Try and parse the input and target dimensions
    unsigned int numInputDimensions = 0;
    unsigned int numTargetDimensions = 0;
    if( parser.get("num-inputs",numInputDimensions) && parser.get("num-targets",numTargetDimensions) ){
      infoLog << "num input dimensions: " << numInputDimensions << " num target dimensions: " << numTargetDimensions << endl;
      trainingData.setInputAndTargetDimensions( numInputDimensions, numTargetDimensions );
    }

    if( (numInputDimensions == 0 || numTargetDimensions == 0) && Util::stringEndsWith( trainDatasetFilename, ".csv" ) ){
      errorLog << "Failed to parse num input dimensions and num target dimensions from input arguments. You must supply the input and target dimensions if the data format is CSV!" << endl;
      printHelp();
      return false; 
    }

    infoLog << "- Loading Training Data..." << endl;
    if( !trainingData.load( trainDatasetFilename ) ){
        errorLog << "Failed to load training data!\n";
        return false;
    }

    const unsigned int N = trainingData.getNumInputDimensions();
    const unsigned int T = trainingData.getNumTargetDimensions();
    infoLog << "- Num training samples: " << trainingData.getNumSamples() << endl;
    infoLog << "- Num input dimensions: " << N << endl;
    infoLog << "- Num target dimensions: " << T << endl;

    //Create a new regression instance
    LinearRegression regression;

    regression.setMaxNumEpochs( maxEpoch );
    regression.setMinChange( minChange );
    regression.setUseValidationSet( true );
    regression.setValidationSetSize( 20 );
    regression.setRandomiseTrainingOrder( true );
    regression.enableScaling( true );

    //Create a new pipeline that will hold the regression algorithm
    GestureRecognitionPipeline pipeline;

    //Add a multidimensional regression instance and set the regression algorithm to Linear Regression
    pipeline.setRegressifier( MultidimensionalRegression( regression, true ) );

    infoLog << "- Training model...\n";

    //Train the classifier
    if( !pipeline.train( trainingData ) ){
        errorLog << "Failed to train model!" << endl;
        return false;
    }

    infoLog << "- Model trained!" << endl;

    infoLog << "- Saving model to: " << modelFilename << endl;

    //Save the pipeline
    if( pipeline.save( modelFilename ) ){
        infoLog << "- Model saved." << endl;
    }else warningLog << "Failed to save model to file: " << modelFilename << endl;

    infoLog << "- TrainingTime: " << pipeline.getTrainingTime() << endl;

    return true;
}