int main(int argc, char **argv) {
    int num_segments = 3;

    vector<double> segment_times = generateSegmentTimes(num_segments);

    double x0 = uniform(generator);
    double xd0 = uniform(generator);

    double xf = uniform(generator);
    double xdf = uniform(generator);

    double x1 = uniform(generator);
    double x2 = uniform(generator);


    PiecewisePolynomial<double> result = twoWaypointCubicSpline(segment_times, x0, xd0, xf, xdf, x1, x2);

    for (int i = 0; i < num_segments; i++) {
        valuecheck(segment_times[i], result.getStartTime(i));
    }
    valuecheck(segment_times[num_segments], result.getEndTime(num_segments - 1));

    // check value constraints
    double tol = 1e-10;
    PiecewisePolynomial<double> derivative = result.derivative();
    PiecewisePolynomial<double> second_derivative = derivative.derivative();


    valuecheck(result.value(result.getStartTime(0)), x0, tol);
    valuecheck(derivative.value(result.getStartTime(0)), xd0, tol);


    valuecheck(result.value(result.getEndTime(num_segments - 1)), xf, tol);
    valuecheck(derivative.value(result.getEndTime(num_segments - 1)), xdf, tol);

    valuecheck(result.value(result.getStartTime(1)), x1, tol);
    valuecheck(result.value(result.getStartTime(2)), x2, tol);

    // check continuity constraints
    double eps = 1e-10;
    int num_knots = num_segments - 1;
    for (int i = 0; i < num_knots; i++) {
        double t_knot = result.getEndTime(i);
        valuecheck(result.value(t_knot - eps), result.value(t_knot + eps), 1e-8);
        valuecheck(derivative.value(t_knot - eps), derivative.value(t_knot + eps), 1e-8);
        valuecheck(second_derivative.value(t_knot - eps), second_derivative.value(t_knot + eps), 1e-8);
    }

#if !defined(WIN32) && !defined(WIN64)
    int ntests = 1000;
    cout << "time: " << measure<chrono::microseconds>::execution(randomSpeedTest, ntests) / (double) ntests << " microseconds." << endl;
#endif

    cout << "test passed" << endl;

    return 0;
}
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
    string usage = "[coefs, objval] = nWaypointCubicSplinemex(ts, xs, xd0, xdf)";
    if (nrhs != 4)
        mexErrMsgIdAndTxt("Drake:nWaypointCubicSplinemex:WrongNumberOfInputs",
                          usage.c_str());
    if (nlhs > 2)
        mexErrMsgIdAndTxt("Drake:nWaypointCubicSplinemex:WrongNumberOfOutputs",
                          usage.c_str());

    const std::vector<double> segment_times = matlabToStdVector<double>(prhs[0]);
    MatrixXd xs = matlabToEigen<Dynamic, Dynamic>(prhs[1]);
    auto xd0 = matlabToEigen<Dynamic, 1>(prhs[2]);
    auto xdf = matlabToEigen<Dynamic, 1>(prhs[3]);

    mwSize ndof = static_cast<mwSize>(xs.rows());
    mwSize num_segments = static_cast<mwSize>(xs.cols()) - 1;
    mwSize num_knots = num_segments - 1;
    mwSize num_coeffs_per_segment = 4;
    mwSize dims[] = {ndof, num_segments, num_coeffs_per_segment};
    plhs[0] = mxCreateNumericArray(3, dims, mxDOUBLE_CLASS, mxREAL);
    double objective_value = 0.0;
    for (mwSize dof = 0; dof < ndof; dof++) {
        VectorXd xi = xs.block(dof, 1, 1, num_knots).transpose();

        PiecewisePolynomial<double> spline =
            nWaypointCubicSpline(segment_times, xs(dof, 0), xd0[dof],
                                 xs(dof, num_segments), xdf[dof], xi);

        PiecewisePolynomial<double> acceleration_squared = spline.derivative(2);
        acceleration_squared *= acceleration_squared;
        PiecewisePolynomial<double> acceleration_squared_integral =
            acceleration_squared.integral();
        objective_value +=
            acceleration_squared_integral.scalarValue(spline.getEndTime()) -
            acceleration_squared_integral.scalarValue(spline.getStartTime());

        for (mwSize segment_index = 0; segment_index < spline.getNumberOfSegments();
                segment_index++) {
            for (mwSize coefficient_index = 0;
                    coefficient_index < num_coeffs_per_segment; coefficient_index++) {
                mwSize sub[] = {dof, segment_index,
                                num_coeffs_per_segment - coefficient_index -
                                1
                               };  // Matlab's reverse coefficient indexing...
                *(mxGetPr(plhs[0]) + sub2ind(3, dims, sub)) =
                    spline.getPolynomial(static_cast<int>(segment_index))
                    .getCoefficients()[coefficient_index];
            }
        }
    }

    if (nlhs > 1) {
        plhs[1] = mxCreateDoubleScalar(objective_value);
    }
}
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
  string usage =
      "[coefs, ts, objective_value] = "
      "nWaypointCubicSplineFreeKnotTimesmex.cpp(t0, tf, xs, xd0, xdf)";
  if (nrhs != 5)
    mexErrMsgIdAndTxt(
        "Drake:nWaypointCubicSplineFreeKnotTimesmex.cpp:WrongNumberOfInputs",
        usage.c_str());
  if (nlhs < 2 || nlhs > 3)
    mexErrMsgIdAndTxt(
        "Drake:nWaypointCubicSplineFreeKnotTimesmex.cpp:WrongNumberOfOutputs",
        usage.c_str());

  double t0 = mxGetPrSafe(prhs[0])[0];
  double tf = mxGetPrSafe(prhs[1])[0];
  MatrixXd xs = matlabToEigen<Dynamic, Dynamic>(prhs[2]);
  auto xd0 = matlabToEigen<Dynamic, 1>(prhs[3]);
  auto xdf = matlabToEigen<Dynamic, 1>(prhs[4]);

  mwSize ndof = static_cast<mwSize>(xs.rows());
  mwSize num_segments = static_cast<mwSize>(xs.cols()) - 1;
  mwSize num_knots = num_segments - 1;
  if (num_knots >= 3)
    mexWarnMsgTxt(
        "More knots than two is likely to be super slow in a grid search!\n");
  if (num_knots <= 0)
    mexErrMsgIdAndTxt(
        "Drake:nWaypointCubicSplineFreeKnotTimesmex.cpp:"
        "NotEnoughKnotsToJustifyThisFunction",
        usage.c_str());
  mwSize num_coeffs_per_segment = 4;
  mwSize dims[] = {ndof, num_segments, num_coeffs_per_segment};
  plhs[0] = mxCreateNumericArray(3, dims, mxDOUBLE_CLASS, mxREAL);

  std::vector<double> segment_times;
  segment_times.resize(static_cast<size_t>(num_segments) + 1);
  segment_times[0] = t0;
  segment_times[static_cast<size_t>(num_segments)] = tf;
  std::vector<double> best_segment_times = segment_times;
  double t_step = (tf - t0) / GRID_STEPS;
  double min_objective_value = numeric_limits<double>::infinity();

  // assemble the knot point locations for input to nWaypointCubicSpline
  MatrixXd xi = xs.block(0, 1, ndof, num_knots);

  if (GRID_STEPS <= num_knots) {
    // If we have have too few grid steps, then by pigeonhole it's
    // impossible to give each a unique time in our grid search.
    mexErrMsgIdAndTxt(
        "Drake:nWaypointCubicSplineFreeKnotTimesmex.cpp:"
        "TooManyKnotsForNumGridSteps",
        usage.c_str());
  }
  std::vector<int> t_indices;
  t_indices.reserve(num_knots);
  for (mwSize i = 0; i < num_knots; i++) {
    t_indices.push_back(i + 1);  // assume knot point won't be the same time as
                                 // the initial state, or previous knot point
  }

  while (t_indices[0] < (GRID_STEPS - static_cast<int>(num_knots) + 1)) {
    for (mwSize i = 0; i < num_knots; i++)
      segment_times[i + 1] = t0 + t_indices[i] * t_step;

    bool valid_solution = true;
    double objective_value = 0.0;
    for (mwSize dof = 0; dof < ndof && valid_solution; dof++) {
      try {
        PiecewisePolynomial<double> spline = nWaypointCubicSpline(
            segment_times, xs(dof, 0), xd0[dof], xs(dof, num_segments),
            xdf[dof], xi.row(dof).transpose());
        PiecewisePolynomial<double> acceleration_squared = spline.derivative(2);
        acceleration_squared *= acceleration_squared;
        PiecewisePolynomial<double> acceleration_squared_integral =
            acceleration_squared.integral();
        objective_value +=
            acceleration_squared_integral.scalarValue(spline.getEndTime()) -
            acceleration_squared_integral.scalarValue(spline.getStartTime());
      } catch (ConstraintMatrixSingularError &) {
        valid_solution = false;
      }
    }

    if (valid_solution && objective_value < min_objective_value) {
      best_segment_times = segment_times;
      min_objective_value = objective_value;
    }

    // Advance grid search counter or terminate, counting from
    // the latest t_index, and on overflow carrying to the
    // next lowest t_index and resetting to the new value of that
    // next lowest t_index. (since times must always be in order!)
    t_indices[num_knots - 1]++;
    // carry, except for the lowest place, which we
    // use to detect doneness.
    for (size_t i = num_knots - 1; i > 0; i--) {
      if ((i == num_knots - 1 && t_indices[i] >= GRID_STEPS) ||
          (i < num_knots - 1 && t_indices[i] >= t_indices[i + 1])) {
        t_indices[i - 1]++;
        t_indices[i] = t_indices[i - 1] + 1;
      }
    }
  }

  for (mwSize dof = 0; dof < ndof; dof++) {
    PiecewisePolynomial<double> spline = nWaypointCubicSpline(
        best_segment_times, xs(dof, 0), xd0[dof], xs(dof, num_segments),
        xdf[dof], xi.row(dof).transpose());
    for (mwSize segment_index = 0;
         segment_index < static_cast<mwSize>(spline.getNumberOfSegments());
         segment_index++) {
      for (mwSize coefficient_index = 0;
           coefficient_index < num_coeffs_per_segment; coefficient_index++) {
        mwSize sub[] = {dof, segment_index,
                        num_coeffs_per_segment - coefficient_index -
                            1};  // Matlab's reverse coefficient indexing...
        *(mxGetPr(plhs[0]) + sub2ind(3, dims, sub)) =
            spline.getPolynomial(static_cast<int>(segment_index))
                .GetCoefficients()[coefficient_index];
      }
    }
  }
  plhs[1] = stdVectorToMatlab(best_segment_times);

  if (nlhs > 2) plhs[2] = mxCreateDoubleScalar(min_objective_value);
}
void mexFunction(int nlhs, mxArray *plhs[],int nrhs, const mxArray *prhs[]) {
  string usage = "[coefs, ts, objective_value] = twoWaypointCubicSplineFreeKnotTimesmex.cpp(t0, tf, xs, xd0, xdf)";
  if (nrhs != 5)
    mexErrMsgIdAndTxt("Drake:twoWaypointCubicSplineFreeKnotTimesmex.cpp:WrongNumberOfInputs", usage.c_str());
  if (nlhs < 2 || nlhs > 3)
    mexErrMsgIdAndTxt("Drake:twoWaypointCubicSplineFreeKnotTimesmex.cpp:WrongNumberOfOutputs", usage.c_str());

  double t0 = mxGetPrSafe(prhs[0])[0];
  double tf = mxGetPrSafe(prhs[1])[0];
  MatrixXd xs = matlabToEigen<Dynamic, Dynamic>(prhs[2]);
  auto xd0 = matlabToEigen<Dynamic, 1>(prhs[3]);
  auto xdf = matlabToEigen<Dynamic, 1>(prhs[4]);

  mwSize ndof = static_cast<mwSize>(xs.rows());
  mwSize num_segments = 3;
  mwSize num_coeffs_per_segment = 4;
  mwSize dims[] = {ndof, num_segments, num_coeffs_per_segment};
  plhs[0] = mxCreateNumericArray(num_segments, dims, mxDOUBLE_CLASS, mxREAL);

  std::vector<double> segment_times;
  segment_times.resize(static_cast<size_t>(num_segments) + 1);
  segment_times[0] = t0;
  segment_times[static_cast<size_t>(num_segments)] = tf;
  std::vector<double> best_segment_times = segment_times;
  double t_step = (tf - t0) / GRID_STEPS;
  double min_objective_value = numeric_limits<double>::infinity();

  for (int t1_index = 0; t1_index < GRID_STEPS; t1_index++) {
    segment_times[1] = t0 + t1_index * t_step;
    for (int t2_index = t1_index; t2_index < GRID_STEPS; t2_index++) {
      segment_times[2] = t0 + t2_index * t_step;

      bool valid_solution = true;
      double objective_value = 0.0;
      for (int dof = 0; dof < ndof && valid_solution; dof++) {
        try {
          PiecewisePolynomial<double> spline = twoWaypointCubicSpline(segment_times, xs(dof, 0), xd0[dof], xs(dof, 3), xdf[dof], xs(dof, 1), xs(dof, 2));
          PiecewisePolynomial<double> acceleration_squared = spline.derivative(2);
          acceleration_squared *= acceleration_squared;
          PiecewisePolynomial<double> acceleration_squared_integral = acceleration_squared.integral();
          objective_value += acceleration_squared_integral.value(spline.getEndTime()) - acceleration_squared_integral.value(spline.getStartTime());
        }
        catch (ConstraintMatrixSingularError&) {
          valid_solution = false;
        }
      }

      if (valid_solution && objective_value < min_objective_value) {
        best_segment_times[1] = segment_times[1];
        best_segment_times[2] = segment_times[2];
        min_objective_value = objective_value;
      }
    }
  }

  for (mwSize dof = 0; dof < ndof; dof++) {
    PiecewisePolynomial<double> spline = twoWaypointCubicSpline(best_segment_times, xs(dof, 0), xd0[dof], xs(dof, 3), xdf[dof], xs(dof, 1), xs(dof, 2));
    for (mwSize segment_index = 0; segment_index < spline.getNumberOfSegments(); segment_index++) {
      for (mwSize coefficient_index = 0; coefficient_index < num_coeffs_per_segment; coefficient_index++) {
        mwSize sub[] = {dof, segment_index, num_coeffs_per_segment - coefficient_index - 1}; // Matlab's reverse coefficient indexing...
        *(mxGetPr(plhs[0]) + sub2ind(3, dims, sub)) = spline.getPolynomial(segment_index).getCoefficients()[coefficient_index];
      }
    }
  }
  plhs[1] = stdVectorToMatlab(best_segment_times);

  if (nlhs > 2)
    plhs[2] = mxCreateDoubleScalar(min_objective_value);
}