int main() {
    printf("\n");
    printf("\n");
    printf("\n");
    printf(
        " HPMPC -- Library for High-Performance implementation of solvers for "
        "MPC.\n");
    printf(
        " Copyright (C) 2014-2015 by Technical University of Denmark. All "
        "rights reserved.\n");
    printf("\n");
    printf(" HPMPC is distributed in the hope that it will be useful,\n");
    printf(" but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
    printf(" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n");
    printf(" See the GNU Lesser General Public License for more details.\n");
    printf("\n");
    printf("\n");
    printf("\n");

#if defined(TARGET_X64_INTEL_HASWELL) ||      \
    defined(TARGET_X64_INTEL_SABDY_BRIDGE) || \
    defined(TARGET_X64_INTEL_CORE) || defined(TARGET_X86_AMD_BULLDOZER)
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);  // flush to zero subnormals !!!
                                                 // works only with one thread
                                                 // !!!
#endif

    int ii, jj;

    int rep, nrep = NREP;

    int nx = 8;   // number of states (it has to be even for the mass-spring
                  // system test problem)
    int nu = 3;   // number of inputs (controllers) (it has to be at least 1 and
                  // at most nx/2 for the mass-spring system test problem)
    int N = 15;   // horizon length
    int nb = 11;  // number of box constrained inputs and states
    int ng = 0;   // 4;  // number of general constraints
    int ngN = 4;  // 4;  // number of general constraints at the last stage

    //    int N2 = 3;   // horizon length of partially condensed problem

    int nbu = nu < nb ? nu : nb;
    int nbx = nb - nu > 0 ? nb - nu : 0;

    // stage-wise variant size
    int nxx[N + 1];
#if defined(ELIMINATE_X0)
    nxx[0] = 0;
#else
    nxx[0] = nx;
#endif
    for (ii = 1; ii <= N; ii++) nxx[ii] = nx;

    int nuu[N + 1];
    for (ii = 0; ii < N; ii++) nuu[ii] = nu;
    nuu[N] = 0;

    int nbb[N + 1];
#if defined(ELIMINATE_X0)
    nbb[0] = nbu;
#else
    nbb[0] = nb;
#endif
    for (ii = 1; ii < N; ii++) nbb[ii] = nb;
    nbb[N] = nbx;

    int ngg[N + 1];
    for (ii = 0; ii < N; ii++) ngg[ii] = ng;
    ngg[N] = ngN;

    printf(
        " Test problem: mass-spring system with %d masses and %d controls.\n",
        nx / 2, nu);
    printf("\n");
    printf(
        " MPC problem size: %d states, %d inputs, %d horizon length, %d "
        "two-sided box constraints, %d two-sided general constraints.\n",
        nx, nu, N, nb, ng);
    printf("\n");
    printf(
        " IP method parameters: predictor-corrector IP, double precision, %d "
        "maximum iterations, %5.1e exit tolerance in duality measure.\n",
        MAXITER, TOL);
    printf("\n");
#if defined(TARGET_X64_AVX2)
    printf(" HPMPC built for the AVX2 architecture\n");
#endif
#if defined(TARGET_X64_AVX)
    printf(" HPMPC built for the AVX architecture\n");
#endif
    printf("\n");

    /************************************************
     * dynamical system
     ************************************************/

    // state space matrices & initial state
    double *A;
    d_zeros(&A, nx, nx);  // states update matrix
    double *B;
    d_zeros(&B, nx, nu);  // inputs matrix
    double *b;
    d_zeros(&b, nx, 1);  // states offset
    double *x0;
    d_zeros(&x0, nx, 1);  // initial state

    // mass-spring system
    double Ts = 0.5;  // sampling time
    mass_spring_system(Ts, nx, nu, A, B, b, x0);

    for (jj = 0; jj < nx; jj++) b[jj] = 0.1;

    for (jj = 0; jj < nx; jj++) x0[jj] = 0;
    x0[0] = 2.5;
    x0[1] = 2.5;

//    d_print_mat(nx, nx, A, nx);
//    d_print_mat(nx, nu, B, nx);
//    d_print_mat(nx, 1, b, nx);
//    d_print_mat(nx, 1, x0, nx);

#if defined(ELIMINATE_X0)
    // compute b0 = b + A*x0
    double *b0;
    d_zeros(&b0, nx, 1);
    dcopy_3l(nx, b, 1, b0, 1);
    dgemv_n_3l(nx, nx, A, nx, x0, b0);
    //    d_print_mat(nx, 1, b, nx);
    //    d_print_mat(nx, 1, b0, nx);

    // then A0 is a matrix of size 0x0
    double *A0;
    d_zeros(&A0, 0, 0);
#endif

    /************************************************
     * box constraints
     ************************************************/

    int jj_end;

    int *idxb0;
    int_zeros(&idxb0, nbb[0], 1);
    double *lb0;
    d_zeros(&lb0, nbb[0], 1);
    double *ub0;
    d_zeros(&ub0, nbb[0], 1);
#if defined(ELIMINATE_X0)
    for (jj = 0; jj < nbb[0]; jj++) {
        lb0[jj] = -0.5;  // umin
        ub0[jj] = +0.5;  // umin
        idxb0[jj] = jj;
    }
#else
    jj_end = nbx < nbb[0] ? nbx : nbb[0];
    for (jj = 0; jj < jj_end; jj++) {
//        lb0[jj] = x0[jj - nbu];  // initial state
//        ub0[jj] = x0[jj - nbu];  // initial state
        lb0[jj] = x0[jj];  // initial state
        ub0[jj] = x0[jj];  // initial state
        idxb0[jj] = jj;
    }
    for (; jj < nbb[0]; jj++) {
        lb0[jj] = -0.5;  // umin
        ub0[jj] = +0.5;  // umax
        idxb0[jj] = jj;
    }
#endif
    //    int_print_mat(nbb[0], 1, idxb0, nbb[0]);
    //    d_print_mat(nbb[0], 1, lb0, nbb[0]);

    int *idxb1;
    int_zeros(&idxb1, nbb[1], 1);
    double *lb1;
    d_zeros(&lb1, nbb[1], 1);
    double *ub1;
    d_zeros(&ub1, nbb[1], 1);
    jj_end = nbx < nbb[1] ? nbx : nbb[1];
    for (jj = 0; jj < jj_end; jj++) {
        lb1[jj] = -4.0;  // xmin
        ub1[jj] = +4.0;  // xmax
        idxb1[jj] = jj;
    }
    for (; jj < nbb[1]; jj++) {
        lb1[jj] = -0.5;  // umin
        ub1[jj] = +0.5;  // umax
        idxb1[jj] = jj;
    }
    //    int_print_mat(nbb[1], 1, idxb1, nbb[1]);
    //    d_print_mat(nbb[1], 1, lb1, nbb[1]);

    int *idxbN;
    int_zeros(&idxbN, nbb[N], 1);
    double *lbN;
    d_zeros(&lbN, nbb[N], 1);
    double *ubN;
    d_zeros(&ubN, nbb[N], 1);
    jj_end = nbx < nbb[N] ? nbx : nbb[N];
    for (jj = 0; jj < jj_end; jj++) {
        lbN[jj] = -4.0;  // xmin
        ubN[jj] = +4.0;  // xmax
        idxbN[jj] = jj;
    }
    for (; jj < nbb[N]; jj++) {
        lbN[jj] = -0.5;  // umin
        ubN[jj] = +0.5;  // umax
        idxbN[jj] = jj;
    }
    //    int_print_mat(nbb[N], 1, idxbN, nbb[N]);
    //    d_print_mat(nbb[N], 1, lbN, nbb[N]);

    /************************************************
     * general constraints
     ************************************************/

    double *C;
    d_zeros(&C, ng, nx);
    double *D;
    d_zeros(&D, ng, nu);
    double *lg;
    d_zeros(&lg, ng, 1);
    double *ug;
    d_zeros(&ug, ng, 1);

    double *CN;
    d_zeros(&CN, ngN, nx);
    for (ii = 0; ii < ngN; ii++) CN[ii * (ngN + 1)] = 1.0;
    //    d_print_mat(ngN, nx, CN, ngN);
    double *lgN;
    d_zeros(&lgN, ngN, 1);  // force all states to 0 at the last stage
    double *ugN;
    d_zeros(&ugN, ngN, 1);  // force all states to 0 at the last stage

    /************************************************
     * cost function
     ************************************************/

    double *Q;
    d_zeros(&Q, nx, nx);
    for (ii = 0; ii < nx; ii++) Q[ii * (nx + 1)] = 1.0;

    double *R;
    d_zeros(&R, nu, nu);
    for (ii = 0; ii < nu; ii++) R[ii * (nu + 1)] = 2.0;

    double *S;
    d_zeros(&S, nu, nx);

    double *q;
    d_zeros(&q, nx, 1);
    for (ii = 0; ii < nx; ii++) q[ii] = 0.1;

    double *r;
    d_zeros(&r, nu, 1);
    for (ii = 0; ii < nu; ii++) r[ii] = 0.2;

#if defined(ELIMINATE_X0)
    // Q0 and q0 are matrices of size 0
    double *Q0;
    d_zeros(&Q0, 0, 0);
    double *q0;
    d_zeros(&q0, 0, 1);

    // compute r0 = r + S*x0
    double *r0;
    d_zeros(&r0, nu, 1);
    dcopy_3l(nu, r, 1, r0, 1);
    dgemv_n_3l(nu, nx, S, nu, x0, r0);

    // then S0 is a matrix of size nux0
    double *S0;
    d_zeros(&S0, nu, 0);
#endif

    /************************************************
     * problems data
     ************************************************/

    double *hA[N];
    double *hB[N];
    double *hb[N];
    double *hQ[N + 1];
    double *hS[N];
    double *hR[N];
    double *hq[N + 1];
    double *hr[N];
    double *hlb[N + 1];
    double *hub[N + 1];
    int *hidxb[N + 1];
    double *hC[N + 1];
    double *hD[N];
    double *hlg[N + 1];
    double *hug[N + 1];

#if defined(ELIMINATE_X0)
    hA[0] = A0;
    hb[0] = b0;
    hQ[0] = Q0;
    hS[0] = S0;
    hq[0] = q0;
    hr[0] = r0;
#else
    hA[0] = A;
    hb[0] = b;
    hQ[0] = Q;
    hS[0] = S;
    hq[0] = q;
    hr[0] = r;
#endif
    hB[0] = B;
    hR[0] = R;
    hlb[0] = lb0;
    hub[0] = ub0;
    hidxb[0] = idxb0;
    hC[0] = C;
    hD[0] = D;
    hlg[0] = lg;
    hug[0] = ug;
    for (ii = 1; ii < N; ii++) {
        hA[ii] = A;
        hB[ii] = B;
        hb[ii] = b;
        hQ[ii] = Q;
        hS[ii] = S;
        hR[ii] = R;
        hq[ii] = q;
        hr[ii] = r;
        hlb[ii] = lb1;
        hub[ii] = ub1;
        hidxb[ii] = idxb1;
        hC[ii] = C;
        hD[ii] = D;
        hlg[ii] = lg;
        hug[ii] = ug;
    }
    hQ[N] = Q;  // or maybe initialize to the solution of the DARE???
    hq[N] = q;  // or maybe initialize to the solution of the DARE???
    hlb[N] = lbN;
    hub[N] = ubN;
    hidxb[N] = idxbN;
    hC[N] = CN;
    hlg[N] = lgN;
    hug[N] = ugN;

    /************************************************
     * solution
     ************************************************/

    double *hx[N + 1];
    double *hu[N];
    double *hpi[N];
    double *hlam[N + 1];
    double *ht[N + 1];

    for (ii = 0; ii < N; ii++) {
        d_zeros(&hx[ii], nxx[ii], 1);
        d_zeros(&hu[ii], nuu[ii], 1);
        d_zeros(&hpi[ii], nxx[ii + 1], 1);
        d_zeros(&hlam[ii], 2 * nbb[ii] + 2 * ngg[ii], 1);
        d_zeros(&ht[ii], 2 * nbb[ii] + 2 * ngg[ii], 1);
    }
    d_zeros(&hx[N], nxx[N], 1);
    d_zeros(&hlam[N], 2 * nbb[N] + 2 * ngg[N], 1);
    d_zeros(&ht[N], 2 * nbb[N] + 2 * ngg[N], 1);

    /************************************************
     * create the in and out struct
     ************************************************/

    ocp_qp_in qp_in;
    qp_in.N = N;
    qp_in.nx = (const int *)nxx;
    qp_in.nu = (const int *)nuu;
    qp_in.nb = (const int *)nbb;
    qp_in.nc = (const int *)ngg;
    qp_in.A = (const double **)hA;
    qp_in.B = (const double **)hB;
    qp_in.b = (const double **)hb;
    qp_in.Q = (const double **)hQ;
    qp_in.S = (const double **)hS;
    qp_in.R = (const double **)hR;
    qp_in.q = (const double **)hq;
    qp_in.r = (const double **)hr;
    qp_in.idxb = (const int **)hidxb;
    qp_in.lb = (const double **)hlb;
    qp_in.ub = (const double **)hub;
    qp_in.Cx = (const double **)hC;
    qp_in.Cu = (const double **)hD;
    qp_in.lc = (const double **)hlg;
    qp_in.uc = (const double **)hug;

    ocp_qp_out qp_out;
    qp_out.x = hx;
    qp_out.u = hu;
    qp_out.pi = hpi;
    qp_out.lam = hlam;
    qp_out.t = ht;  // XXX why also the slack variables ???

    /************************************************
     * solver arguments (fully sparse)
     ************************************************/

    // solver arguments
    ocp_qp_condensing_hpipm_args *hpipm_args = ocp_qp_condensing_hpipm_create_arguments(&qp_in);
//    hpipm_args->mu_max = TOL;
//    hpipm_args->iter_max = MAXITER;
//    hpipm_args->alpha_min = MINSTEP;
    hpipm_args->mu0 = 1.0;  // 0.0

    /************************************************
     * work space (fully sparse)
     ************************************************/

    int work_space_size =
        ocp_qp_condensing_hpipm_calculate_workspace_size(&qp_in, hpipm_args);
    printf("\nwork space size: %d bytes\n", work_space_size);
    void *workspace = malloc(work_space_size);

    //    void *mem;
    //    ocp_qp_hpipm_create_memory(&qp_in, hpipm_args, &mem);
    int memory_size =
        ocp_qp_condensing_hpipm_calculate_memory_size(&qp_in, hpipm_args);
    printf("\nmemory: %d bytes\n", memory_size);
    void *memory = malloc(memory_size);

    ocp_qp_condensing_hpipm_memory *hpipm_memory =
        ocp_qp_condensing_hpipm_create_memory(&qp_in, hpipm_args);

    /************************************************
     * call the solver (fully sparse)
     ************************************************/

    int return_value;

    acados_timer timer;
    acados_tic(&timer);

    //  nrep = 1;
    for (rep = 0; rep < nrep; rep++) {
        // call the QP OCP solver
        //        return_value = ocp_qp_hpipm(&qp_in, &qp_out, hpipm_args,
        //        workspace);
        return_value =
            ocp_qp_condensing_hpipm(&qp_in, &qp_out, hpipm_args, hpipm_memory, workspace);
    }

    real_t time = acados_toc(&timer)/nrep;

    if (return_value == ACADOS_SUCCESS)
        printf("\nACADOS status: solution found in %d iterations\n",
               hpipm_memory->iter);

    if (return_value == ACADOS_MAXITER)
        printf("\nACADOS status: maximum number of iterations reached\n");

    if (return_value == ACADOS_MINSTEP)
        printf("\nACADOS status: below minimum step size length\n");

    printf("\nu = \n");
    for (ii = 0; ii < N; ii++) d_print_mat(1, nuu[ii], hu[ii], 1);

    printf("\nx = \n");
    for (ii = 0; ii <= N; ii++) d_print_mat(1, nxx[ii], hx[ii], 1);

    printf("\npi = \n");
    for (ii = 0; ii < N; ii++) d_print_mat(1, nxx[ii+1], hpi[ii], 1);

    printf("\nlam = \n");
    for (ii = 0; ii <= N; ii++) d_print_mat(1, 2*nbb[ii]+2*ngg[ii], hlam[ii], 1);

    printf("\n");
    printf(" inf norm res: %e, %e, %e, %e, %e\n", hpipm_memory->inf_norm_res[0],
           hpipm_memory->inf_norm_res[1], hpipm_memory->inf_norm_res[2],
           hpipm_memory->inf_norm_res[3], hpipm_memory->inf_norm_res[4]);
    printf("\n");
    printf(
        " Solution time for %d IPM iterations, averaged over %d runs: %5.2e "
        "seconds\n", hpipm_memory->iter, nrep, time);
    printf("\n\n");

    /************************************************
     * free memory
     ************************************************/

    d_free(A);
    d_free(B);
    d_free(b);
    d_free(x0);
    d_free(Q);
    d_free(S);
    d_free(R);
    d_free(q);
    d_free(r);
#if defined(ELIMINATE_X0)
    d_free(A0);
    d_free(b0);
    d_free(Q0);
    d_free(S0);
    d_free(q0);
    d_free(r0);
#endif
    int_free(idxb0);
    d_free(lb0);
    d_free(ub0);
    int_free(idxb1);
    d_free(lb1);
    d_free(ub1);
    int_free(idxbN);
    d_free(lbN);
    d_free(ubN);
    d_free(C);
    d_free(D);
    d_free(lg);
    d_free(ug);
    d_free(CN);
    d_free(lgN);
    d_free(ugN);

    for (ii = 0; ii < N; ii++) {
        d_free(hx[ii]);
        d_free(hu[ii]);
        d_free(hpi[ii]);
        d_free(hlam[ii]);
        d_free(ht[ii]);
    }
    d_free(hx[N]);
    d_free(hlam[N]);
    d_free(ht[N]);

    free(workspace);
    free(memory);

    return 0;
}
Example #2
0
int main() {

#if defined(TARGET_X64_INTEL_HASWELL) || defined(TARGET_X64_INTEL_SABDY_BRIDGE) ||  \
    defined(TARGET_X64_INTEL_CORE) || defined(TARGET_X86_AMD_BULLDOZER)
    _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);  // flush to zero subnormals !!!
                                                 // works only with one thread
                                                 // !!!
#endif

    int ii, jj;

    int rep, nrep = NREP;

    int nx = 8;  // number of states (it has to be even for the mass-spring
                  // system test problem)
    int nu = 3;  // number of inputs (controllers) (it has to be at least 1 and
                  // at most nx/2 for the mass-spring system test problem)
    int N = 15;   // horizon length
    int nb = 11;  // number of box constrained inputs and states
    int ng = 0;  // 4;  // number of general constraints
    int ngN = 4;  // 4;  // number of general constraints at the last stage

    int nbu = nu < nb ? nu : nb;
    int nbx = nb - nu > 0 ? nb - nu : 0;

    // stage-wise variant size
    int nxx[N + 1];
#if defined(ELIMINATE_X0)
    nxx[0] = 0;
#else
    nxx[0] = nx;
#endif
    for (ii = 1; ii <= N; ii++) nxx[ii] = nx;

    int nuu[N + 1];
    for (ii = 0; ii < N; ii++) nuu[ii] = nu;
    nuu[N] = 0;

    int nbb[N + 1];
#if defined(ELIMINATE_X0)
    nbb[0] = nbu;
#else
    nbb[0] = nb;
#endif
    for (ii = 1; ii < N; ii++) nbb[ii] = nb;
    nbb[N] = nbx;

    int ngg[N + 1];
    for (ii = 0; ii < N; ii++) ngg[ii] = ng;
    ngg[N] = ngN;

    printf(
        " Test problem: mass-spring system with %d masses and %d controls.\n",
        nx / 2, nu);
    printf("\n");
    printf(
        " MPC problem size: %d states, %d inputs, %d horizon length, %d "
        "two-sided box constraints, %d two-sided general constraints.\n",
        nx, nu, N, nb, ng);
    printf("\n");
    printf("qpDUNES\n");
    printf("\n");

    /************************************************
    * dynamical system
    ************************************************/

    // state space matrices & initial state
    double *A;
    d_zeros(&A, nx, nx);  // states update matrix
    double *B;
    d_zeros(&B, nx, nu);  // inputs matrix
    double *b;
    d_zeros(&b, nx, 1);  // states offset
    double *x0;
    d_zeros(&x0, nx, 1);  // initial state

    // mass-spring system
    double Ts = 0.5;  // sampling time
    mass_spring_system(Ts, nx, nu, A, B, b, x0);

    for (jj = 0; jj < nx; jj++) b[jj] = 0.1;

    for (jj = 0; jj < nx; jj++) x0[jj] = 0;
    x0[0] = 2.5;
    x0[1] = 2.5;

    //    d_print_mat(nx, nx, A, nx);
    //    d_print_mat(nx, nu, B, nx);
    //    d_print_mat(nx, 1, b, nx);
    //    d_print_mat(nx, 1, x0, nx);

#if defined(ELIMINATE_X0)
    // compute b0 = b + A*x0
    double *b0;
    d_zeros(&b0, nx, 1);
    dcopy_3l(nx, b, 1, b0, 1);
    dgemv_n_3l(nx, nx, A, nx, x0, b0);
    //    d_print_mat(nx, 1, b, nx);
    //    d_print_mat(nx, 1, b0, nx);

    // then A0 is a matrix of size 0x0
    double *A0;
    d_zeros(&A0, 0, 0);
#endif

    /************************************************
    * box constraints
    ************************************************/

#if defined (FLIP_BOUNDS)
    int jj_end;
#endif

    int *idxb0;
    int_zeros(&idxb0, nbb[0], 1);
    double *lb0;
    d_zeros(&lb0, nbb[0], 1);
    double *ub0;
    d_zeros(&ub0, nbb[0], 1);
#if defined(ELIMINATE_X0)
    for (jj = 0; jj < nbb[0]; jj++) {
        lb0[jj] = - 0.5;  // umin
        ub0[jj] = + 0.5;  // umin
        idxb0[jj] = jj;
    }
#else
#if defined (FLIP_BOUNDS)
jj_end = nbu < nbb[0] ? nbu : nbb[0];
for (jj = 0; jj < jj_end; jj++) {
    lb0[jj] = - 0.5;  // umin
    ub0[jj] = + 0.5;  // umax
    idxb0[jj] = jj;
}
for ( ; jj < nbb[0]; jj++) {
    lb0[jj] = x0[jj-nbu];  // initial state
    ub0[jj] = x0[jj-nbu];  // initial state
    idxb0[jj] = jj;
}
#else
for (jj = 0; jj < nxx[0]; jj++) {
    lb0[jj] = x0[jj];  // initial state
    ub0[jj] = x0[jj];  // initial state
    idxb0[jj] = jj;
}
for (jj = 0; jj < nbu; jj++) {
    lb0[jj+nxx[0]] = -0.5;  // umin
    ub0[jj+nxx[0]] = 0.5;   // umax
    idxb0[jj+nxx[0]] = nxx[0]+jj;
}
#endif
#endif
    //    int_print_mat(nbb[0], 1, idxb0, nbb[0]);
    //    d_print_mat(nbb[0], 1, lb0, nbb[0]);

    int *idxb1;
    int_zeros(&idxb1, nbb[1], 1);
    double *lb1;
    d_zeros(&lb1, nbb[1], 1);
    double *ub1;
    d_zeros(&ub1, nbb[1], 1);
#if defined (FLIP_BOUNDS)
    jj_end = nbu < nbb[1] ? nbu : nbb[1];
    for (jj = 0; jj < jj_end; jj++) {
        lb1[jj] = - 0.5;  // umin
        ub1[jj] = + 0.5;  // umax
        idxb1[jj] = jj;
    }
    for ( ; jj < nbb[1]; jj++) {
        lb1[jj] = - 4.0;  // xmin
        ub1[jj] = + 4.0;  // xmax
        idxb1[jj] = jj;
    }
#else
    for (jj = 0; jj < nbx; jj++) {
        lb1[jj] = -4.0;  // xmin
        ub1[jj] = 4.0;   // xmax
        idxb1[jj] = jj;
    }
    for (; jj < nb; jj++) {
        lb1[jj] = -0.5;  // umin
        ub1[jj] = 0.5;   // umax
        idxb1[jj] = jj;
    }
#endif
    //    int_print_mat(nbb[1], 1, idxb1, nbb[1]);
    //    d_print_mat(nbb[1], 1, lb1, nbb[1]);

    int *idxbN;
    int_zeros(&idxbN, nbb[N], 1);
    double *lbN;
    d_zeros(&lbN, nbb[N], 1);
    double *ubN;
    d_zeros(&ubN, nbb[N], 1);
#if defined (FLIP_BOUNDS)
    jj_end = nbu < nbb[N] ? nbu : nbb[N];
    for (jj = 0; jj < jj_end; jj++) {
        lbN[jj] = - 0.5;  // umin
        ubN[jj] = + 0.5;  // umax
        idxbN[jj] = jj;
    }
    for ( ; jj < nbb[N]; jj++) {
        lbN[jj] = - 4.0;  // xmin
        ubN[jj] = + 4.0;  // xmax
        idxbN[jj] = jj;
    }
#else
    for (jj = 0; jj < nbx; jj++) {
        lbN[jj] = -4.0;  // xmin
        ubN[jj] = 4.0;   // xmax
        idxbN[jj] = jj;
    }
#endif
    //    int_print_mat(nbb[N], 1, idxbN, nbb[N]);
    //    d_print_mat(nbb[N], 1, lbN, nbb[N]);

    /************************************************
    * general constraints
    ************************************************/

    double *C;
    d_zeros(&C, ng, nx);
    double *D;
    d_zeros(&D, ng, nu);
    double *lg;
    d_zeros(&lg, ng, 1);
    double *ug;
    d_zeros(&ug, ng, 1);

    double *CN;
    d_zeros(&CN, ngN, nx);
    for (ii = 0; ii < ngN; ii++) CN[ii * (ngN + 1)] = 1.0;
    //    d_print_mat(ngN, nx, CN, ngN);
    double *lgN;
    d_zeros(&lgN, ngN, 1);  // force all states to 0 at the last stage
    double *ugN;
    d_zeros(&ugN, ngN, 1);  // force all states to 0 at the last stage

    /************************************************
    * cost function
    ************************************************/

    double *Q;
    d_zeros(&Q, nx, nx);
    for (ii = 0; ii < nx; ii++) Q[ii * (nx + 1)] = 1.0;

    double *R;
    d_zeros(&R, nu, nu);
    for (ii = 0; ii < nu; ii++) R[ii * (nu + 1)] = 2.0;

    double *S;
    d_zeros(&S, nu, nx);

    double *q;
    d_zeros(&q, nx, 1);
    for (ii = 0; ii < nx; ii++) q[ii] = 0.1;

    double *r;
    d_zeros(&r, nu, 1);
    for (ii = 0; ii < nu; ii++) r[ii] = 0.2;

#if defined(ELIMINATE_X0)
    // Q0 and q0 are matrices of size 0
    double *Q0;
    d_zeros(&Q0, 0, 0);
    double *q0;
    d_zeros(&q0, 0, 1);

    // compute r0 = r + S*x0
    double *r0;
    d_zeros(&r0, nu, 1);
    dcopy_3l(nu, r, 1, r0, 1);
    dgemv_n_3l(nu, nx, S, nu, x0, r0);

    // then S0 is a matrix of size nux0
    double *S0;
    d_zeros(&S0, nu, 0);
#endif

    /************************************************
    * problems data
    ************************************************/

    double *hA[N];
    double *hB[N];
    double *hb[N];
    double *hQ[N + 1];
    double *hS[N];
    double *hR[N];
    double *hq[N + 1];
    double *hr[N];
    double *hlb[N + 1];
    double *hub[N + 1];
    int *hidxb[N + 1];
    double *hC[N + 1];
    double *hD[N];
    double *hlg[N + 1];
    double *hug[N + 1];

#if defined(ELIMINATE_X0)
    hA[0] = A0;
    hb[0] = b0;
    hQ[0] = Q0;
    hS[0] = S0;
    hq[0] = q0;
    hr[0] = r0;
#else
    hA[0] = A;
    hb[0] = b;
    hQ[0] = Q;
    hS[0] = S;
    hq[0] = q;
    hr[0] = r;
#endif
    hB[0] = B;
    hR[0] = R;
    hlb[0] = lb0;
    hub[0] = ub0;
    hidxb[0] = idxb0;
    hC[0] = C;
    hD[0] = D;
    hlg[0] = lg;
    hug[0] = ug;
    for (ii = 1; ii < N; ii++) {
        hA[ii] = A;
        hB[ii] = B;
        hb[ii] = b;
        hQ[ii] = Q;
        hS[ii] = S;
        hR[ii] = R;
        hq[ii] = q;
        hr[ii] = r;
        hlb[ii] = lb1;
        hub[ii] = ub1;
        hidxb[ii] = idxb1;
        hC[ii] = C;
        hD[ii] = D;
        hlg[ii] = lg;
        hug[ii] = ug;
    }
    hQ[N] = Q;  // or maybe initialize to the solution of the DARE???
    hq[N] = q;  // or maybe initialize to the solution of the DARE???
    hlb[N] = lbN;
    hub[N] = ubN;
    hidxb[N] = idxbN;
    hC[N] = CN;
    hlg[N] = lgN;
    hug[N] = ugN;

    /************************************************
    * solution
    ************************************************/

    double *hx[N + 1];
    double *hu[N];
    double *hpi[N];
    double *hlam[N+1];
    double *ht[N+1];

    for (ii = 0; ii < N; ii++) {
        d_zeros(&hx[ii], nxx[ii], 1);
        d_zeros(&hu[ii], nuu[ii], 1);
        d_zeros(&hpi[ii], nxx[ii+1], 1);
        d_zeros(&hlam[ii], 2*nbb[ii]+2*ngg[ii], 1);
        d_zeros(&ht[ii], 2*nbb[ii]+2*ngg[ii], 1);
    }
    d_zeros(&hx[N], nxx[N], 1);
    d_zeros(&hlam[N], 2*nbb[N]+2*ngg[N], 1);
    d_zeros(&ht[N], 2*nbb[N]+2*ngg[N], 1);

    /************************************************
    * XXX initial guess
    ************************************************/

    double *hux_in[N+1];
    double *hlam_in[N+1];
    double *ht_in[N+1];

    for (ii = 0; ii <= N; ii++) {
        d_zeros(&hux_in[ii], nuu[ii]+nxx[ii], 1);
        d_zeros(&hlam_in[ii], 2*nbb[ii]+2*ngg[ii], 1);
        d_zeros(&ht_in[ii], 2*nbb[ii]+2*ngg[ii], 1);
    }

    /************************************************
    * create the in and out struct
    ************************************************/

    ocp_qp_in qp_in;
    qp_in.N = N;
    qp_in.nx = (const int *) nxx;
    qp_in.nu = (const int *) nuu;
    qp_in.nb = (const int *) nbb;
    qp_in.nc = (const int *) ngg;
    qp_in.A = (const double **) hA;
    qp_in.B = (const double **) hB;
    qp_in.b = (const double **) hb;
    qp_in.Q = (const double **) hQ;
    qp_in.S = (const double **) hS;
    qp_in.R = (const double **) hR;
    qp_in.q = (const double **) hq;
    qp_in.r = (const double **) hr;
    qp_in.idxb = (const int **) hidxb;
    qp_in.lb = (const double **) hlb;
    qp_in.ub = (const double **) hub;
    qp_in.Cx = (const double **) hC;
    qp_in.Cu = (const double **) hD;
    qp_in.lc = (const double **) hlg;
    qp_in.uc = (const double **) hug;

    ocp_qp_out qp_out;
    qp_out.x = hx;
    qp_out.u = hu;
    qp_out.pi = hpi;
    qp_out.lam = hlam;
    qp_out.t = ht;  // XXX why also the slack variables ???

    /************************************************
    * solver arguments
    ************************************************/

    ocp_qp_qpdunes_args *args = ocp_qp_qpdunes_create_arguments(QPDUNES_LINEAR_MPC);

    /************************************************
    * workspace
    ************************************************/

    ocp_qp_qpdunes_memory *mem = NULL;

    int_t work_space_size = ocp_qp_qpdunes_calculate_workspace_size(&qp_in, args);
    void *work = (void*)malloc(work_space_size);

    /************************************************
    * call the solver
    ************************************************/

    int return_value = 0;

    acados_timer timer;
    acados_tic(&timer);

//  nrep = 1;
    for (rep = 0; rep < nrep; rep++) {
        // NOTE(dimitris): creating memory again to avoid warm start
        mem = ocp_qp_qpdunes_create_memory(&qp_in, args);

        // call the QP OCP solver
        ocp_qp_qpdunes(&qp_in, &qp_out, args, mem, work);
    }

    real_t time = acados_toc(&timer)/nrep;

    if (return_value == ACADOS_SUCCESS)
        printf("\nACADOS status: solution found\n");

    if (return_value == ACADOS_MAXITER)
        printf("\nACADOS status: maximum number of iterations reached\n");

    if (return_value == ACADOS_MINSTEP)
        printf("\nACADOS status: below minimum step size length\n");

    printf("\nu = \n");
    for (ii = 0; ii < N; ii++) d_print_mat(1, nuu[ii], hu[ii], 1);

    printf("\nx = \n");
    for (ii = 0; ii <= N; ii++) d_print_mat(1, nxx[ii], hx[ii], 1);

    printf("\n");
    printf(" Average solution time over %d runs: %5.2e seconds\n", nrep, time);
    printf("\n\n");

    /************************************************
    * free memory
    ************************************************/

    d_free(A);
    d_free(B);
    d_free(b);
    d_free(x0);
    d_free(Q);
    d_free(S);
    d_free(R);
    d_free(q);
    d_free(r);
#if defined(ELIMINATE_X0)
    d_free(A0);
    d_free(b0);
    d_free(Q0);
    d_free(S0);
    d_free(q0);
    d_free(r0);
#endif
    int_free(idxb0);
    d_free(lb0);
    d_free(ub0);
    int_free(idxb1);
    d_free(lb1);
    d_free(ub1);
    int_free(idxbN);
    d_free(lbN);
    d_free(ubN);
    d_free(C);
    d_free(D);
    d_free(lg);
    d_free(ug);
    d_free(CN);
    d_free(lgN);
    d_free(ugN);

    for (ii = 0; ii < N; ii++) {
        d_free(hx[ii]);
        d_free(hu[ii]);
        d_free(hpi[ii]);
        d_free(hlam[ii]);
        d_free(ht[ii]);
    }
    d_free(hx[N]);
    d_free(hlam[N]);
    d_free(ht[N]);

    ocp_qp_qpdunes_free_memory(mem);
    free(work);

    return 0;
}
Example #3
0
// Simple SQP example for acados
int main() {
    int_t nil;
    int_t NMF_MAX = 4;  // data exist up to 9 masses
    int_t IMPL_MAX = 2;  // was originally 4, reduced to run the ctest faster

    for (int_t implicit = 0; implicit < IMPL_MAX; implicit++) {
        if (implicit == 0) {
            printf(
                "\n\n--------------------------------------------------------------------\n");
            printf(
                "------------------ Explicit Runge-Kutta of order 4 -----------------\n");
            printf(
                "--------------------------------------------------------------------\n");
        } else {
            printf(
                "\n\n--------------------------------------------------------------------\n");
            printf(
                "-------------- Lifted Implicit Runge-Kutta of order %d --------------\n",
                2 * implicit);
            printf(
                "--------------------------------------------------------------------\n");
        }

        for (int_t NMF = 1; NMF < NMF_MAX; NMF++) {
            printf("\n------------ NUMBER OF FREE MASSES =  %d ------------\n",
                   NMF);
            int_t NX = 6 * NMF;
            int_t NU = 3;
            int_t jj;

            // Problem data
            int_t N = NN;
            real_t *x0;
            real_t *w;  // states and controls stacked
            real_t *Q;
            real_t *R;
            real_t *xref;
            real_t *uref;
            int_t max_sqp_iters = 20;
            int_t max_iters = 1;
            real_t *x_end;
            real_t *u_end;
            FILE *initStates, *refStates;
            real_t feas, stepX, stepU;

            d_zeros(&x0, NX, 1);
            d_zeros(&xref, NX, 1);
            d_zeros(&uref, NU, 1);
            d_zeros(&x_end, NX, 1);
            d_zeros(&u_end, NU, 1);
            d_zeros(&w, NN * (NX + NU) + NX, 1);
            d_zeros(&Q, NX, NX);
            d_zeros(&R, NU, NU);

            switch (NMF) {
                case 1:
                    initStates = fopen(X0_NM2_FILE, "r");
                    break;
                case 2:
                    initStates = fopen(X0_NM3_FILE, "r");
                    break;
                case 3:
                    initStates = fopen(X0_NM4_FILE, "r");
                    break;
                case 4:
                    initStates = fopen(X0_NM5_FILE, "r");
                    break;
                case 5:
                    initStates = fopen(X0_NM6_FILE, "r");
                    break;
                case 6:
                    initStates = fopen(X0_NM7_FILE, "r");
                    break;
                case 7:
                    initStates = fopen(X0_NM8_FILE, "r");
                    break;
                default:
                    initStates = fopen(X0_NM9_FILE, "r");
                    break;
            }
            for (int_t i = 0; i < NX; i++) {
                nil = fscanf(initStates, "%lf", &x0[i]);
            }
            fclose(initStates);

            switch (NMF) {
                case 1:
                    refStates = fopen(XN_NM2_FILE, "r");
                    break;
                case 2:
                    refStates = fopen(XN_NM3_FILE, "r");
                    break;
                case 3:
                    refStates = fopen(XN_NM4_FILE, "r");
                    break;
                case 4:
                    refStates = fopen(XN_NM5_FILE, "r");
                    break;
                case 5:
                    refStates = fopen(XN_NM6_FILE, "r");
                    break;
                case 6:
                    refStates = fopen(XN_NM7_FILE, "r");
                    break;
                case 7:
                    refStates = fopen(XN_NM8_FILE, "r");
                    break;
                default:
                    refStates = fopen(XN_NM9_FILE, "r");
                    break;
            }
            for (int_t i = 0; i < NX; i++) {
                nil = fscanf(refStates, "%lf", &xref[i]);
            }
            fclose(refStates);

            for (int_t i = 0; i < NN; i++) {
                for (int_t j = 0; j < NX; j++) w[i * (NX + NU) + j] = xref[j];
            }
            for (int_t j = 0; j < NX; j++) w[NN * (NX + NU) + j] = xref[j];

            for (int_t i = 0; i < NX; i++) Q[i * (NX + 1)] = 1.0;
            for (int_t i = 0; i < NU; i++) R[i * (NU + 1)] = 0.01;

            // Integrator structs
            real_t T = 0.2;
            sim_in sim_in[NN];
            sim_out sim_out[NN];
            sim_info info[NN];

            sim_RK_opts rk_opts[NN];
            void *sim_work = NULL;
            sim_lifted_irk_memory irk_mem[NN];

            for (jj = 0; jj < NN; jj++) {
                sim_in[jj].num_steps = 2;
                sim_in[jj].step = T / sim_in[jj].num_steps;
                sim_in[jj].nx = NX;
                sim_in[jj].nu = NU;

                sim_in[jj].sens_forw = true;
                sim_in[jj].sens_adj = false;
                sim_in[jj].sens_hess = false;
                sim_in[jj].num_forw_sens = NX + NU;

                switch (NMF) {
                    case 1:
                        sim_in[jj].vde = &vde_chain_nm2;
                        sim_in[jj].forward_vde_wrapper = &vde_fun;
                        sim_in[jj].jac = &jac_chain_nm2;
                        sim_in[jj].jacobian_wrapper = &jac_fun;
                        break;
                    case 2:
                        sim_in[jj].vde = &vde_chain_nm3;
                        sim_in[jj].forward_vde_wrapper = &vde_fun;
                        sim_in[jj].jac = &jac_chain_nm2;
                        sim_in[jj].jacobian_wrapper = &jac_fun;
                        break;
                    case 3:
                        sim_in[jj].vde = &vde_chain_nm4;
                        sim_in[jj].forward_vde_wrapper = &vde_fun;
                        sim_in[jj].jac = &jac_chain_nm4;
                        sim_in[jj].jacobian_wrapper = &jac_fun;
                        break;
                    // case 4:
                    //     sim_in[jj].vde = &vde_chain_nm5;
                    //     sim_in[jj].VDE_forw = &vde_fun;
                    //     sim_in[jj].jac = &jac_chain_nm5;
                    //     sim_in[jj].jac_fun = &jac_fun;
                    //     break;
                    // case 5:
                    //     sim_in[jj].vde = &vde_chain_nm6;
                    //     sim_in[jj].VDE_forw = &vde_fun;
                    //     sim_in[jj].jac = &jac_chain_nm6;
                    //     sim_in[jj].jac_fun = &jac_fun;
                    //     break;
                    // case 6:
                    //     sim_in[jj].vde = &vde_chain_nm7;
                    //     sim_in[jj].VDE_forw = &vde_fun;
                    //     sim_in[jj].jac = &jac_chain_nm7;
                    //     sim_in[jj].jac_fun = &jac_fun;
                    //     break;
                    // case 7:
                    //     sim_in[jj].vde = &vde_chain_nm8;
                    //     sim_in[jj].VDE_forw = &vde_fun;
                    //     sim_in[jj].jac = &jac_chain_nm8;
                    //     sim_in[jj].jac_fun = &jac_fun;
                    //     break;
                    // default:
                    //     sim_in[jj].vde = &vde_chain_nm9;
                    //     sim_in[jj].VDE_forw = &vde_fun;
                    //     sim_in[jj].jac = &jac_chain_nm9;
                    //     sim_in[jj].jac_fun = &jac_fun;
                    //     break;
                }

                sim_in[jj].x = malloc(sizeof(*sim_in[jj].x) * (NX));
                sim_in[jj].u = malloc(sizeof(*sim_in[jj].u) * (NU));
                sim_in[jj].S_forw = malloc(sizeof(*sim_in[jj].S_forw) * (NX * (NX + NU)));

                for (int_t i = 0; i < NX * (NX + NU); i++)
                    sim_in[jj].S_forw[i] = 0.0;
                for (int_t i = 0; i < NX; i++)
                    sim_in[jj].S_forw[i * (NX + 1)] = 1.0;

                sim_out[jj].xn = malloc(sizeof(*sim_out[jj].xn) * (NX));
                sim_out[jj].S_forw = malloc(sizeof(*sim_out[jj].S_forw) * (NX * (NX + NU)));
                sim_out[jj].info = &info[jj];

                int_t workspace_size;
                if (implicit > 0) {
                    sim_irk_create_arguments(&rk_opts[jj], implicit, "Gauss");
                    workspace_size =
                        sim_lifted_irk_calculate_workspace_size(&sim_in[jj], &rk_opts[jj]);
                    sim_lifted_irk_create_memory(&sim_in[jj], &rk_opts[jj], &irk_mem[jj]);
                } else {
                    sim_erk_create_arguments(&rk_opts[jj], 4);
                    workspace_size =
                        sim_erk_calculate_workspace_size(&sim_in[jj], &rk_opts[jj]);
                }
                if (jj == 0) sim_work = (void *)malloc(workspace_size);
            }

            int_t nx[NN + 1] = {0};
            int_t nu[NN + 1] = {0};
            int_t nb[NN + 1] = {0};
            int_t nc[NN + 1] = {0};

            nx[0] = NX;
            nu[0] = NU;
            nb[0] = NX + NU;
            nc[0] = 0;
            for (int_t i = 1; i < N; i++) {
                nx[i] = NX;
                nu[i] = NU;
                nb[i] = NU;
                nc[i] = 0;
            }
            nx[N] = NX;
            nu[N] = 0;
            nb[N] = 0;
            nc[N] = 0;

            /************************************************
             * box constraints
             ************************************************/

            int *idxb0;
            int_zeros(&idxb0, NX + NU, 1);
            real_t *lb0;
            d_zeros(&lb0, NX + NU, 1);
            real_t *ub0;
            d_zeros(&ub0, NX + NU, 1);
            for (int_t j = 0; j < NX; j++) {
                lb0[j] = x0[j];  //   xmin
                ub0[j] = x0[j];  //   xmax
                idxb0[j] = j;
            }
            for (int_t j = 0; j < NU; j++) {
                lb0[j + NX] = -UMAX;  //   umin
                ub0[j + NX] = UMAX;   //   umax
                idxb0[j + NX] = j + NX;
            }

            int_t *idxb1;
            int_zeros(&idxb1, NU, 1);
            real_t *lb1[N - 1];
            real_t *ub1[N - 1];
            for (int_t i = 0; i < N - 1; i++) {
                d_zeros(&lb1[i], NU, 1);
                d_zeros(&ub1[i], NU, 1);
                for (int_t j = 0; j < NU; j++) {
                    lb1[i][j] = -UMAX;  //   umin
                    ub1[i][j] = UMAX;   //   umax
                    idxb1[j] = j + NX;
                }
            }

            real_t *pA[N];
            real_t *pB[N];
            real_t *pb[N];
            real_t *pQ[N + 1];
            real_t *pS[N];
            real_t *pR[N];
            real_t *pq[N + 1];
            real_t *pr[N];
            real_t *px[N + 1];
            real_t *pu[N];
            real_t *ppi[N];
            real_t *plam[N + 1];
            real_t *pCx[N + 1];
            real_t *pCu[N];
            real_t *plc[N + 1];
            real_t *puc[N + 1];

            for (int_t i = 0; i < N; i++) {
                d_zeros(&pA[i], nx[i + 1], nx[i]);
                d_zeros(&pB[i], nx[i + 1], nu[i]);
                d_zeros(&pb[i], nx[i + 1], 1);
                d_zeros(&pS[i], nu[i], nx[i]);
                d_zeros(&pq[i], nx[i], 1);
                d_zeros(&pr[i], nu[i], 1);
                d_zeros(&px[i], nx[i], 1);
                d_zeros(&pu[i], nu[i], 1);
                d_zeros(&ppi[i], nx[i + 1], 1);
                d_zeros(&plam[i], 2*nb[i] + 2*nc[i], 1);
                d_zeros(&pCx[i], nc[i], nx[i]);
                d_zeros(&pCu[i], nc[i], nu[i]);
                d_zeros(&plc[i], nc[i], 1);
                d_zeros(&puc[i], nc[i], 1);
            }
            d_zeros(&pq[N], nx[N], 1);
            d_zeros(&px[N], nx[N], 1);
            d_zeros(&pCx[N], nc[N], nx[N]);
            d_zeros(&plc[N], nc[N], 1);
            d_zeros(&puc[N], nc[N], 1);

            d_zeros(&plam[N], 2*nb[N] + 2*nc[N], 1);

            real_t *hlb[N + 1];
            real_t *hub[N + 1];
            int *hidxb[N + 1];

            hlb[0] = lb0;
            hub[0] = ub0;
            hidxb[0] = idxb0;
            for (int_t i = 1; i < N; i++) {
                hlb[i] = lb1[i - 1];
                hub[i] = ub1[i - 1];
                hidxb[i] = idxb1;
            }

            // Allocate OCP QP variables
            ocp_qp_in qp_in;
            qp_in.N = N;
            ocp_qp_out qp_out;

            qp_in.nx = nx;
            qp_in.nu = nu;
            qp_in.nb = nb;
            qp_in.nc = nc;
            for (int_t i = 0; i < N; i++) {
                pQ[i] = Q;
                pR[i] = R;
            }
            pQ[N] = Q;
            qp_in.Q = (const real_t **)pQ;
            qp_in.S = (const real_t **)pS;
            qp_in.R = (const real_t **)pR;
            qp_in.q = (const real_t **)pq;
            qp_in.r = (const real_t **)pr;
            qp_in.A = (const real_t **)pA;
            qp_in.B = (const real_t **)pB;
            qp_in.b = (const real_t **)pb;
            // TODO(dimitris): rename all to p
            qp_in.lb = (const real_t **)hlb;
            qp_in.ub = (const real_t **)hub;
            qp_in.idxb = (const int **)hidxb;
            qp_in.Cx = (const real_t **)pCx;
            qp_in.Cu = (const real_t **)pCu;
            qp_in.lc = (const real_t **)plc;
            qp_in.uc = (const real_t **)puc;;
            qp_out.x = px;
            qp_out.u = pu;
            qp_out.pi = ppi;
            qp_out.lam = plam;

            // Initialize solver
            #ifdef USE_QPOASES
            ocp_qp_condensing_qpoases_args *qpsolver_args =
                ocp_qp_condensing_qpoases_create_arguments(&qp_in);
            ocp_qp_condensing_qpoases_memory *qpsolver_memory =
                ocp_qp_condensing_qpoases_create_memory(&qp_in, qpsolver_args);

            int_t qpsolver_workspace_size =
                ocp_qp_condensing_qpoases_calculate_workspace_size(&qp_in, qpsolver_args);
            int_t qpsolver_memory_size =
                ocp_qp_condensing_qpoases_calculate_memory_size(&qp_in, qpsolver_args);
            #else
            ocp_qp_condensing_hpipm_args *qpsolver_args =
                ocp_qp_condensing_hpipm_create_arguments(qp_in);
            ocp_qp_condensing_hpipm_memory qpsolver_memory;

            qpsolver_args->mu_max = 1e-8;
            qpsolver_args->iter_max = 20;
            qpsolver_args->alpha_min = 1e-8;
            qpsolver_args->mu0 = 1.0;

            int_t qpsolver_workspace_size =
                ocp_qp_condensing_hpipm_calculate_workspace_size(&qp_in, qpsolver_args);
            int_t qpsolver_memory_size =
                ocp_qp_condensing_hpipm_calculate_memory_size(&qp_in, qpsolver_args);
            #endif

            void *qpsolver_work = calloc(qpsolver_workspace_size, sizeof(char));

            #ifdef USE_QPOASES
            (void) qpsolver_memory_size;
            #else
            ocp_qp_condensing_hpipm_assign_memory(&qp_in, qpsolver_args, &qpsolver_memory,
                qpsolver_mem);
            #endif

            acados_timer timer;
            real_t timings = 0;
            real_t timings_sim = 0;
            real_t timings_la = 0;
            real_t timings_ad = 0;
            //    for (int_t iter = 0; iter < max_iters; iter++) {
            //        printf("\n------ TIME STEP %d ------\n", iter);

            acados_tic(&timer);
            for (int_t sqp_iter = 0; sqp_iter < max_sqp_iters; sqp_iter++) {
                feas = -1e10;
                stepX = -1e10;
                stepU = -1e10;

#if PARALLEL
#pragma omp parallel for
#endif

                for (int_t i = 0; i < N; i++) {
                    // Pass state and control to integrator
                    for (int_t j = 0; j < NX; j++)
                        sim_in[i].x[j] = w[i * (NX + NU) + j];
                    for (int_t j = 0; j < NU; j++)
                        sim_in[i].u[j] = w[i * (NX + NU) + NX + j];
                    if (implicit > 0) {
                        sim_lifted_irk(&sim_in[i], &sim_out[i], &rk_opts[i],
                                       &irk_mem[i], sim_work);
                    } else {
                        sim_erk(&sim_in[i], &sim_out[i], &rk_opts[i], 0,
                                sim_work);
                    }

                    for (int_t j = 0; j < NX; j++) {
                        pb[i][j] = sim_out[i].xn[j] - w[(i + 1) * (NX + NU) + j];
                        if (fabs(pb[i][j]) > feas) feas = fabs(pb[i][j]);
                        for (int_t k = 0; k < NX; k++)
                            pA[i][j * NX + k] = sim_out[i].S_forw[j * NX + k];  // COLUMN MAJOR
                                                                                // FROM CASADI
                    }
                    for (int_t j = 0; j < NU; j++)
                        for (int_t k = 0; k < NX; k++)
                            pB[i][j * NX + k] = sim_out[i].S_forw[(NX + j) * NX + k];

                    timings_sim += sim_out[i].info->CPUtime;
                    timings_la += sim_out[i].info->LAtime;
                    timings_ad += sim_out[i].info->ADtime;
                }
                for (int_t i = 0; i < N; i++) {
                    // Update bounds:
                    if (i == 0) {
                        for (int_t j = 0; j < NU; j++) {
                            lb0[NX + j] = -UMAX - w[NX + j];
                            ub0[NX + j] = UMAX - w[NX + j];
                        }
                    } else {
                        for (int_t j = 0; j < NU; j++) {
                            lb1[i - 1][j] = -UMAX - w[i * (NX + NU) + NX + j];
                            ub1[i - 1][j] = UMAX - w[i * (NX + NU) + NX + j];
                        }
                    }

                    // Construct QP matrices:
                    for (int_t j = 0; j < NX; j++)
                        pq[i][j] = Q[j * (NX + 1)] * (w[i * (NX + NU) + j] - xref[j]);

                    for (int_t j = 0; j < NU; j++)
                        pr[i][j] = R[j * (NU + 1)] * (w[i * (NX + NU) + NX + j] - uref[j]);

                }
                for (int_t j = 0; j < NX; j++) {
                    lb0[j] = (x0[j] - w[j]);
                    ub0[j] = (x0[j] - w[j]);
                }

                for (int_t j = 0; j < NX; j++)
                    pq[N][j] = Q[j * (NX + 1)] * (w[N * (NX + NU) + j] - xref[j]);

                // Set updated bounds:
                hlb[0] = lb0;
                hub[0] = ub0;
                for (int_t i = 1; i < N; i++) {
                    hlb[i] = lb1[i - 1];
                    hub[i] = ub1[i - 1];
                }

                int status = 0;

                #ifdef USE_QPOASES
                status = ocp_qp_condensing_qpoases(&qp_in, &qp_out, qpsolver_args,
                    qpsolver_memory, qpsolver_work);
                #else
                status = ocp_qp_condensing_hpipm(&qp_in, &qp_out, qpsolver_args,
                    &qpsolver_memory, qpsolver_work);
                #endif

                if (status) {
                    printf("qpOASES returned error status %d\n", status);
                    return -1;
                }
                for (int_t i = 0; i < N; i++) {
                    for (int_t j = 0; j < NX; j++) {
                        w[i * (NX + NU) + j] += qp_out.x[i][j];
                        if (fabs(qp_out.x[i][j]) > stepX)
                            stepX = fabs(qp_out.x[i][j]);
                    }
                    for (int_t j = 0; j < NU; j++) {
                        w[i * (NX + NU) + NX + j] += qp_out.u[i][j];
                        if (fabs(qp_out.u[i][j]) > stepU)
                            stepU = fabs(qp_out.u[i][j]);
                    }
                }
                for (int_t j = 0; j < NX; j++) {
                    w[N * (NX + NU) + j] += qp_out.x[N][j];
                    if (fabs(qp_out.x[N][j]) > stepX)
                        stepX = fabs(qp_out.x[N][j]);
                }

                if (sqp_iter == max_sqp_iters - 1) {
                    fprintf(stdout,
                            "--- ITERATION %d, Infeasibility: %+.3e , step X: "
                            "%+.3e, "
                            "step U: %+.3e \n",
                            sqp_iter, feas, stepX, stepU);
                }
            }
            //        for (int_t i = 0; i < NX; i++) x0[i] = w[NX+NU+i];
            //        shift_states(w, x_end, N);
            //        shift_controls(w, u_end, N);
            timings += acados_toc(&timer);
//    }

            printf("\nAverage of %.3f ms in the integrator,\n",
                   1e3 * timings_sim / (max_sqp_iters * max_iters));
            printf("  of which %.3f ms spent in CasADi and\n",
                   1e3 * timings_ad / (max_sqp_iters * max_iters));
            printf("  of which %.3f ms spent in BLASFEO.\n",
                   1e3 * timings_la / (max_sqp_iters * max_iters));
            printf("--Total of %.3f ms per SQP iteration.--\n",
                   1e3 * timings / (max_sqp_iters * max_iters));

            //    #ifdef DEBUG
            //    print_matrix_name("stdout", "sol", w, NX+NU, N);
            //    #endif  // DEBUG

            // TODO(dimitris): this program is leaking memory
        }
    }
    return 0 * nil;
}
int sim_lifted_irk(void *config_, sim_in *in, sim_out *out, void *opts_, void *mem_,
                       void *work_)
{
    // typecasting
    sim_config *config = config_;
    sim_opts *opts = opts_;
    sim_lifted_irk_memory *memory = (sim_lifted_irk_memory *) mem_;

    void *dims_ = in->dims;
    sim_lifted_irk_dims *dims = (sim_lifted_irk_dims *) dims_;

    sim_lifted_irk_workspace *workspace =
        (sim_lifted_irk_workspace *) sim_lifted_irk_cast_workspace(config, dims, opts,
                                                                           work_);

    int nx = dims->nx;
    int nu = dims->nu;
    int nz = dims->nz;

    int ns = opts->ns;

    if ( opts->ns != opts->tableau_size )
    {
        printf("Error in sim_lifted_irk: the Butcher tableau size does not match ns");
        return ACADOS_FAILURE;
    }
    // assert - only use supported features
    if (nz != 0)
    {
        printf("nz should be zero - DAEs are not supported by the lifted IRK integrator");
        return ACADOS_FAILURE;
    }
    if (opts->output_z)
    {
        printf("opts->output_z should be false - DAEs are not supported for the lifted IRK integrator");
        return ACADOS_FAILURE;
    }
    if (opts->sens_algebraic)
    {
        printf("opts->sens_algebraic should be false - DAEs are not supported for the lifted IRK integrator");
        return ACADOS_FAILURE;
    }

    int ii, jj, ss;
    double a;


    double *x = in->x;
    double *u = in->u;
    double *S_forw_in = in->S_forw;

    // int newton_iter = opts->newton_iter; // not used; always 1 in lifted

    double *A_mat = opts->A_mat;
    double *b_vec = opts->b_vec;
    int num_steps = opts->num_steps;

    double step = in->T / num_steps;
    // TODO(FreyJo): this should be an option!
    int update_sens = memory->update_sens;

    int *ipiv = workspace->ipiv;
    struct blasfeo_dmat *JGK = memory->JGK;
    struct blasfeo_dmat *S_forw = memory->S_forw;

    struct blasfeo_dmat *J_temp_x = workspace->J_temp_x;
    struct blasfeo_dmat *J_temp_xdot = workspace->J_temp_xdot;
    struct blasfeo_dmat *J_temp_u = workspace->J_temp_u;

    struct blasfeo_dvec *rG = workspace->rG;
    struct blasfeo_dvec *K = memory->K;
    struct blasfeo_dmat *JGf = memory->JGf;
    struct blasfeo_dmat *JKf = memory->JKf;
    struct blasfeo_dvec *xt = workspace->xt;
    struct blasfeo_dvec *xn = workspace->xn;
    struct blasfeo_dvec *xn_out = workspace->xn_out;
    struct blasfeo_dvec *dxn = workspace->dxn;

    struct blasfeo_dvec *w = workspace->w;

    double *x_out = out->xn;
    double *S_forw_out = out->S_forw;

    struct blasfeo_dvec_args ext_fun_in_K;

    ext_fun_arg_t ext_fun_type_in[3];
    void *ext_fun_in[3];

    struct blasfeo_dvec_args ext_fun_out_rG;
    ext_fun_arg_t ext_fun_type_out[5];
    void *ext_fun_out[5];

    lifted_irk_model *model = in->model;

    acados_timer timer, timer_ad;
    double timing_ad = 0.0;

    if (opts->sens_adj)
    {
        printf("LIFTED_IRK with ADJOINT SENSITIVITIES - NOT IMPLEMENTED YET - EXITING.");
        exit(1);
    }


    blasfeo_dgese(nx, nx, 0.0, J_temp_x, 0, 0);
    blasfeo_dgese(nx, nx, 0.0, J_temp_xdot, 0, 0);
    blasfeo_dgese(nx, nu, 0.0, J_temp_x, 0, 0);

    blasfeo_dvecse(nx * ns, 0.0, rG, 0);

    // TODO(dimitris): shouldn't this be NF instead of nx+nu??
    if (update_sens) blasfeo_pack_dmat(nx, nx + nu, S_forw_in, nx, S_forw, 0, 0);

    blasfeo_dvecse(nx * ns, 0.0, rG, 0);
    blasfeo_pack_dvec(nx, x, xn, 0);
    blasfeo_pack_dvec(nx, x, xn_out, 0);
    blasfeo_dvecse(nx, 0.0, dxn, 0);


    // start the loop
    acados_tic(&timer);
    for (ss = 0; ss < num_steps; ss++)
    {
        // initialize
        blasfeo_dgese(nx * ns, nx * ns, 0.0, JGK, 0, 0);
        blasfeo_dgese(nx * ns, nx + nu, 0.0, JGf, 0, 0);

        // expansion step (K variables)
        // compute x and u step
        blasfeo_pack_dvec(nx, in->x, w, 0);
        blasfeo_pack_dvec(nu, in->u, w, nx);

        blasfeo_daxpy(nx, -1.0, memory->x, 0, w, 0, w, 0);
        blasfeo_daxpy(nu, -1.0, memory->u, 0, w, nx, w, nx);
        blasfeo_dgemv_n(nx * ns, nx + nu, 1.0, &JKf[ss], 0, 0, w, 0, 1.0, &K[ss], 0, &K[ss], 0);

        blasfeo_pack_dvec(nx, in->x, memory->x, 0);
        blasfeo_pack_dvec(nu, in->u, memory->u, 0);

        // reset value of JKf
        blasfeo_dgese(nx * ns, nx + nu, 0.0, &JKf[ss], 0, 0);

        for (ii = 0; ii < ns; ii++)  // ii-th row of tableau
        {
            // take x(n); copy a strvec into a strvec
            blasfeo_dveccp(nx, xn, 0, xt, 0);

            for (jj = 0; jj < ns; jj++)
            {  // jj-th col of tableau
                a = A_mat[ii + ns * jj];
                if (a != 0)
                {
                    // xt = xt + T_int * a[i,j]*K_j
                    a *= step;
                    blasfeo_daxpy(nx, a, &K[ss], jj * nx, xt, 0, xt, 0);
                }
            }

            if (!update_sens)
            {
                // compute the residual of implicit ode at time t_ii, store value in rGt
                acados_tic(&timer_ad);

                ext_fun_type_in[0] = BLASFEO_DVEC;
                ext_fun_in[0] = xt;  // x: nx
                ext_fun_type_in[1] = BLASFEO_DVEC_ARGS;
                ext_fun_in_K.xi = ii * nx;
                ext_fun_in_K.x = &K[ss];
                ext_fun_in[1] = &ext_fun_in_K;  // K[ii*nx]: nx
                ext_fun_type_in[2] = COLMAJ;
                ext_fun_in[2] = u;  // u: nu

                ext_fun_type_out[0] = BLASFEO_DVEC_ARGS;
                ext_fun_out_rG.x = rG;
                ext_fun_out_rG.xi = ii * nx;
                ext_fun_out[0] = &ext_fun_out_rG;  // fun: nx

                model->impl_ode_fun->evaluate(model->impl_ode_fun, ext_fun_type_in, ext_fun_in,
                                              ext_fun_type_out, ext_fun_out);

                timing_ad += acados_toc(&timer_ad);
            }
            else
            {
                // compute the jacobian of implicit ode
                acados_tic(&timer_ad);

                ext_fun_type_in[0] = BLASFEO_DVEC;
                ext_fun_in[0] = xt;  // x: nx
                ext_fun_type_in[1] = BLASFEO_DVEC_ARGS;
                ext_fun_in_K.xi = ii * nx;  // K[ii*nx]: nx
                ext_fun_in_K.x = &K[ss];
                ext_fun_in[1] = &ext_fun_in_K;
                ext_fun_type_in[2] = COLMAJ;
                ext_fun_in[2] = u;  // u: nu

                ext_fun_type_out[0] = BLASFEO_DVEC_ARGS;
                ext_fun_out_rG.x = rG;
                ext_fun_out_rG.xi = ii * nx;
                ext_fun_out[0] = &ext_fun_out_rG;  // fun: nx
                ext_fun_type_out[1] = BLASFEO_DMAT;
                ext_fun_out[1] = J_temp_x;  // jac_x: nx*nx
                ext_fun_type_out[2] = BLASFEO_DMAT;
                ext_fun_out[2] = J_temp_xdot;  // jac_xdot: nx*nx
                ext_fun_type_out[3] = BLASFEO_DMAT;
                ext_fun_out[3] = J_temp_u;  // jac_u: nx*nu

                model->impl_ode_fun_jac_x_xdot_u->evaluate(model->impl_ode_fun_jac_x_xdot_u,
                                                           ext_fun_type_in, ext_fun_in,
                                                           ext_fun_type_out, ext_fun_out);

                timing_ad += acados_toc(&timer_ad);

                blasfeo_dgecp(nx, nx, J_temp_x, 0, 0, JGf, ii * nx, 0);
                blasfeo_dgecp(nx, nu, J_temp_u, 0, 0, JGf, ii * nx, nx);

                for (jj = 0; jj < ns; jj++)
                {
                    // compute the block (ii,jj)th block of JGK
                    a = A_mat[ii + ns * jj];
                    if (a != 0)
                    {
                        a *= step;
                        blasfeo_dgead(nx, nx, a, J_temp_x, 0, 0, JGK, ii * nx, jj * nx);
                    }
                    if (jj == ii)
                    {
                        blasfeo_dgead(nx, nx, 1, J_temp_xdot, 0, 0, JGK, ii * nx, jj * nx);
                    }
                }  // end jj
            }
        }  // end ii

        // obtain x(n+1) before updating K(n)
        for (ii = 0; ii < ns; ii++)
            blasfeo_daxpy(nx, step * b_vec[ii], &K[ss], ii * nx, xn, 0, xn, 0);

        // DGETRF computes an LU factorization of a general M-by-N matrix A
        // using partial pivoting with row interchanges.

        if (update_sens)
        {
            blasfeo_dgetrf_rp(nx * ns, nx * ns, JGK, 0, 0, JGK, 0, 0, ipiv);
        }

        // update r.h.s (6.23, Quirynen2017)
        blasfeo_dgemv_n(nx * ns, nx, 1.0, JGf, 0, 0, dxn, 0, 1.0, rG, 0, rG, 0);


        // permute also the r.h.s
        blasfeo_dvecpe(nx * ns, ipiv, rG, 0);

        // solve JGK * y = rG, JGK on the (l)eft, (l)ower-trian, (n)o-trans
        //                    (u)nit trian
        blasfeo_dtrsv_lnu(nx * ns, JGK, 0, 0, rG, 0, rG, 0);

        // solve JGK * x = rG, JGK on the (l)eft, (u)pper-trian, (n)o-trans
        //                    (n)o unit trian , and store x in rG
        blasfeo_dtrsv_unn(nx * ns, JGK, 0, 0, rG, 0, rG, 0);


        // scale and add a generic strmat into a generic strmat // K = K - rG, where rG is DeltaK
        blasfeo_daxpy(nx * ns, -1.0, rG, 0, &K[ss], 0, &K[ss], 0);

        // obtain dx(n)
        for (ii = 0; ii < ns; ii++)
            blasfeo_daxpy(nx, -step * b_vec[ii], rG, ii * nx, dxn, 0, dxn, 0);

        // update JKf
        blasfeo_dgemm_nn(nx * ns, nx + nu, nx, 1.0, JGf, 0, 0, S_forw, 0, 0, 0.0, &JKf[ss], 0, 0,
                         &JKf[ss], 0, 0);

        blasfeo_dgead(nx * ns, nu, 1.0, JGf, 0, nx, &JKf[ss], 0, nx);

        blasfeo_drowpe(nx * ns, ipiv, &JKf[ss]);
        blasfeo_dtrsm_llnu(nx * ns, nx + nu, 1.0, JGK, 0, 0, &JKf[ss], 0, 0, &JKf[ss], 0, 0);
        blasfeo_dtrsm_lunn(nx * ns, nx + nu, 1.0, JGK, 0, 0, &JKf[ss], 0, 0, &JKf[ss], 0, 0);

        // update forward sensitivity
        for (jj = 0; jj < ns; jj++)
            blasfeo_dgead(nx, nx + nu, -step * b_vec[jj], &JKf[ss], jj * nx, 0, S_forw, 0, 0);

        // obtain x(n+1)
        for (ii = 0; ii < ns; ii++)
            blasfeo_daxpy(nx, step * b_vec[ii], &K[ss], ii * nx, xn_out, 0, xn_out, 0);

    }  // end int step ss


    // extract output
    blasfeo_unpack_dvec(nx, xn_out, 0, x_out);

    blasfeo_unpack_dmat(nx, nx + nu, S_forw, 0, 0, S_forw_out, nx);

    out->info->CPUtime = acados_toc(&timer);
    out->info->LAtime = 0.0;
    out->info->ADtime = timing_ad;

    return 0;
}