Пример #1
0
void ocp_nlp_out_set(ocp_nlp_config *config, ocp_nlp_dims *dims, ocp_nlp_out *out,
                     int stage, const char *field, void *value)
{
    if (!strcmp(field, "x"))
    {
        double *double_values = value;
        blasfeo_pack_dvec(dims->nx[stage], double_values, &out->ux[stage], dims->nu[stage]);
    }
    else if (!strcmp(field, "u"))
    {
        double *double_values = value;
        blasfeo_pack_dvec(dims->nu[stage], double_values, &out->ux[stage], 0);
    }
    else
    {
        printf("\nerror: ocp_nlp_out_set: field %s not available\n", field);
        exit(1);
    }
}
Пример #2
0
static void mdlStart(SimStruct *S)
{
    int i, j;

    int nx[NUM_STAGES+1], nu[NUM_STAGES+1], nz[NUM_STAGES+1], ny[NUM_STAGES+1], nb[NUM_STAGES+1], nbx[NUM_STAGES+1], nbu[NUM_STAGES+1], ng[NUM_STAGES+1], nh[NUM_STAGES+1], nq[NUM_STAGES+1], ns[NUM_STAGES+1];
    for (i = 0; i <= NUM_STAGES; ++i)
    {
        nx[i] = NUM_STATES;
        nu[i] = NUM_CONTROLS;
        nz[i] = 2;
        ny[i] = 1 + nx[i] + nu[i];
        nbx[i] = nx[i];
        nbu[i] = nu[i];
        nb[i] = nbx[i] + nbu[i];
        ng[i] = 0;
        nh[i] = 0;
        nq[i] = 0;
        ns[i] = 0;
    }

    nu[NUM_STAGES] = 0;
    nz[NUM_STAGES] = 0;
    ny[NUM_STAGES] = 1 + nx[NUM_STAGES];
    nbu[NUM_STAGES] = 0;
    nb[NUM_STAGES] = nbx[NUM_STAGES] + nbu[NUM_STAGES];

    int idxb[nbu[0]+nbx[0]];
    for (i = 0; i < nbu[0]+nbx[0]; ++i)
        idxb[i] = i;

    // sampling time (s)
    double T = 0.05;

    // x: u1, u2, xD1, xD2
    double x0[] = {50, 50, 1.14275, 1.53787};
    // z: xA1, xA2
    static double z0[] = {1.28976, 1.78264};
    static struct blasfeo_dvec z0_dvec;
    blasfeo_allocate_dvec(2, &z0_dvec);
    blasfeo_pack_dvec(2, z0, &z0_dvec, 0);
    // u: u1_r, u2_r
    double u[] = {0, 0};

    // reference
    double y_ref[] = {1.500, 50, 50, 1.14275, 1.53787, 0, 0};

    // weighting matrices
    double W[ny[0]*ny[0]];
    for (i = 0; i < ny[0]*ny[0]; ++i)
        W[i] = 0;
    W[0*(ny[0]+1)] = 1000;
    W[1*(ny[0]+1)] = 1e-3;
    W[2*(ny[0]+1)] = 1e-3;
    W[3*(ny[0]+1)] = 1e-3;
    W[4*(ny[0]+1)] = 1e-3;
    W[5*(ny[0]+1)] = 0.1e-3;
    W[6*(ny[0]+1)] = 0.1e-3;

    double W_N[ny[NUM_STAGES]*ny[NUM_STAGES]];
    for (i = 0; i < ny[NUM_STAGES]*ny[NUM_STAGES]; ++i)
        W_N[i] = 0;
    W_N[0*(ny[NUM_STAGES]+1)] = 1000;
    W_N[1*(ny[NUM_STAGES]+1)] = 1e-3;
    W_N[2*(ny[NUM_STAGES]+1)] = 1e-3;
    W_N[3*(ny[NUM_STAGES]+1)] = 1e-3;
    W_N[4*(ny[NUM_STAGES]+1)] = 1e-3;

    double lb_0[] = {-10000, -10000, 50, 50, 1.14275, 1.53787};
    double ub_0[] = {+10000, +10000, 50, 50, 1.14275, 1.53787};

    double lb[] = {-10000, -10000, 0, 0, 0.5, 0.5};
    double ub[] = {+10000, +10000, 100, 100, 1.757, 2.125};

    double lb_N[] = {0, 0, 0.5, 0.5};
    double ub_N[] = {100, 100, 1.757, 2.125};

    ocp_nlp_plan *plan = ocp_nlp_plan_create(NUM_STAGES);

	plan->nlp_solver = SQP_GN;

	for (i = 0; i <= NUM_STAGES; i++)
		plan->nlp_cost[i] = NONLINEAR_LS;

	plan->ocp_qp_solver_plan.qp_solver = PARTIAL_CONDENSING_HPIPM;

	for (i = 0; i < NUM_STAGES; i++)
    {
		plan->nlp_dynamics[i] = CONTINUOUS_MODEL;
		plan->sim_solver_plan[i].sim_solver = IRK;
	}

	for (i = 0; i <= NUM_STAGES; i++)
		plan->nlp_constraints[i] = BGH;

	ocp_nlp_config *config = ocp_nlp_config_create(*plan);

    // implicit dae
    impl_dae_fun.casadi_fun = &engine_impl_dae_fun;
    impl_dae_fun.casadi_work = &engine_impl_dae_fun_work;
    impl_dae_fun.casadi_sparsity_in = &engine_impl_dae_fun_sparsity_in;
    impl_dae_fun.casadi_sparsity_out = &engine_impl_dae_fun_sparsity_out;
    impl_dae_fun.casadi_n_in = &engine_impl_dae_fun_n_in;
    impl_dae_fun.casadi_n_out = &engine_impl_dae_fun_n_out;
    external_function_casadi_create(&impl_dae_fun);

    impl_dae_fun_jac_x_xdot_z.casadi_fun = &engine_impl_dae_fun_jac_x_xdot_z;
    impl_dae_fun_jac_x_xdot_z.casadi_work = &engine_impl_dae_fun_jac_x_xdot_z_work;
    impl_dae_fun_jac_x_xdot_z.casadi_sparsity_in = &engine_impl_dae_fun_jac_x_xdot_z_sparsity_in;
    impl_dae_fun_jac_x_xdot_z.casadi_sparsity_out = &engine_impl_dae_fun_jac_x_xdot_z_sparsity_out;
    impl_dae_fun_jac_x_xdot_z.casadi_n_in = &engine_impl_dae_fun_jac_x_xdot_z_n_in;
    impl_dae_fun_jac_x_xdot_z.casadi_n_out = &engine_impl_dae_fun_jac_x_xdot_z_n_out;
    external_function_casadi_create(&impl_dae_fun_jac_x_xdot_z);

    impl_dae_jac_x_xdot_u_z.casadi_fun = &engine_impl_dae_jac_x_xdot_u_z;
    impl_dae_jac_x_xdot_u_z.casadi_work = &engine_impl_dae_jac_x_xdot_u_z_work;
    impl_dae_jac_x_xdot_u_z.casadi_sparsity_in = &engine_impl_dae_jac_x_xdot_u_z_sparsity_in;
    impl_dae_jac_x_xdot_u_z.casadi_sparsity_out = &engine_impl_dae_jac_x_xdot_u_z_sparsity_out;
    impl_dae_jac_x_xdot_u_z.casadi_n_in = &engine_impl_dae_jac_x_xdot_u_z_n_in;
    impl_dae_jac_x_xdot_u_z.casadi_n_out = &engine_impl_dae_jac_x_xdot_u_z_n_out;
    external_function_casadi_create(&impl_dae_jac_x_xdot_u_z);

    nls_cost_residual.casadi_fun = &engine_ls_cost;
    nls_cost_residual.casadi_work = &engine_ls_cost_work;
    nls_cost_residual.casadi_sparsity_in = &engine_ls_cost_sparsity_in;
    nls_cost_residual.casadi_sparsity_out = &engine_ls_cost_sparsity_out;
    nls_cost_residual.casadi_n_in = &engine_ls_cost_n_in;
    nls_cost_residual.casadi_n_out = &engine_ls_cost_n_out;
    external_function_casadi_create(&nls_cost_residual);

    nls_cost_N_residual.casadi_fun = &engine_ls_cost_N;
    nls_cost_N_residual.casadi_work = &engine_ls_cost_N_work;
    nls_cost_N_residual.casadi_sparsity_in = &engine_ls_cost_N_sparsity_in;
    nls_cost_N_residual.casadi_sparsity_out = &engine_ls_cost_N_sparsity_out;
    nls_cost_N_residual.casadi_n_in = &engine_ls_cost_N_n_in;
    nls_cost_N_residual.casadi_n_out = &engine_ls_cost_N_n_out;
    external_function_casadi_create(&nls_cost_N_residual);

    // dimensions
    ocp_nlp_dims *dims = ocp_nlp_dims_create(config);

    ocp_nlp_dims_set_opt_vars(config, dims, "nx", nx);
    ocp_nlp_dims_set_opt_vars(config, dims, "nu", nu);
    ocp_nlp_dims_set_opt_vars(config, dims, "nz", nz);
    ocp_nlp_dims_set_opt_vars(config, dims, "ns", ns);

	for (int i = 0; i <= N; i++)
    {
        ocp_nlp_dims_set_cost(config, dims, i, "ny", &ny[i]);

        ocp_nlp_dims_set_constraints(config, dims, i, "nbx", &nbx[i]);
        ocp_nlp_dims_set_constraints(config, dims, i, "nbu", &nbu[i]);
        ocp_nlp_dims_set_constraints(config, dims, i, "ng", &ng[i]);
        ocp_nlp_dims_set_constraints(config, dims, i, "nh", &nh[i]);
    }

    // in
	ocp_nlp_in *nlp_in = ocp_nlp_in_create(config, dims);
	for (i = 0; i < NUM_STAGES; ++i)
    	nlp_in->Ts[i] = T;

    // cost
    ocp_nlp_cost_nls_model **cost = (ocp_nlp_cost_nls_model **) nlp_in->cost;

	for (i = 0; i < NUM_STAGES; ++i) {
        cost[i]->nls_res_jac = (external_function_generic *) &nls_cost_residual;
        cost[i]->nls_hess = NULL;
        blasfeo_pack_dvec(ny[i], y_ref, &cost[i]->y_ref, 0);
        blasfeo_pack_dmat(ny[i], ny[i], W, ny[i], &cost[i]->W, 0, 0);
    }

    cost[NUM_STAGES]->nls_res_jac = (external_function_generic *) &nls_cost_N_residual;
    cost[NUM_STAGES]->nls_hess = NULL;
    blasfeo_pack_dvec(ny[NUM_STAGES], y_ref, &cost[NUM_STAGES]->y_ref, 0);
    blasfeo_pack_dmat(ny[NUM_STAGES], ny[NUM_STAGES], W_N, ny[NUM_STAGES], &cost[NUM_STAGES]->W, 0, 0);

    // dynamics
    for (i = 0; i < NUM_STAGES; ++i)
    {
        if(ocp_nlp_dynamics_model_set(config, dims, nlp_in, i, "impl_ode_fun", &impl_dae_fun)) exit(1);
        if(ocp_nlp_dynamics_model_set(config, dims, nlp_in, i, "impl_ode_fun_jac_x_xdot", &impl_dae_fun_jac_x_xdot_z)) exit(1);
        if(ocp_nlp_dynamics_model_set(config, dims, nlp_in, i, "impl_ode_jac_x_xdot_u", &impl_dae_jac_x_xdot_u_z)) exit(1);
    }

    // bounds
	ocp_nlp_constraints_bgh_model **constraints = (ocp_nlp_constraints_bgh_model **) nlp_in->constraints;
	ocp_nlp_constraints_bgh_dims **constraints_dims = (ocp_nlp_constraints_bgh_dims **) dims->constraints;

    ocp_nlp_constraints_bounds_set(config, dims, nlp_in, 0, "lb", lb_0);
    ocp_nlp_constraints_bounds_set(config, dims, nlp_in, 0, "ub", ub_0);
    for (i = 0; i < nb[0]; ++i)
        constraints[0]->idxb[i] = idxb[i];

    for (i = 1; i < NUM_STAGES; ++i)
    {
        ocp_nlp_constraints_bounds_set(config, dims, nlp_in, i, "lb", lb);
        ocp_nlp_constraints_bounds_set(config, dims, nlp_in, i, "ub", ub);
        for (j = 0; j < nb[i]; ++j)
            constraints[i]->idxb[j] = idxb[j];
    }

    ocp_nlp_constraints_bounds_set(config, dims, nlp_in, NUM_STAGES, "lb", lb_N);
    ocp_nlp_constraints_bounds_set(config, dims, nlp_in, NUM_STAGES, "ub", ub_N);

    for (i = 0; i < nb[NUM_STAGES]; ++i)
        constraints[NUM_STAGES]->idxb[i] = idxb[i];

    // options
    ocp_nlp_sqp_opts *nlp_opts = ocp_nlp_opts_create(config, dims);
    nlp_opts->maxIter = 1;

    // out
    ocp_nlp_out *nlp_out = ocp_nlp_out_create(config, dims);

    // solver
	ocp_nlp_solver *solver = ocp_nlp_create(config, dims, nlp_opts);

    // initialize
    for (i = 0; i < NUM_STAGES; ++i)
    {
        blasfeo_pack_dvec(nu[i], u, nlp_out->ux+i, 0);
        blasfeo_pack_dvec(nx[i], x0, nlp_out->ux+i, nu[i]);
        nlp_out->z = &z0_dvec;
    }
    blasfeo_pack_dvec(nx[NUM_STAGES], x0, nlp_out->ux+NUM_STAGES, nu[NUM_STAGES]);

    ssGetPWork(S)[0] = (void *) dims;
    ssGetPWork(S)[1] = (void *) nlp_in;
    ssGetPWork(S)[2] = (void *) nlp_out;
    ssGetPWork(S)[3] = (void *) nlp_opts;
    ssGetPWork(S)[4] = (void *) solver;
    ssGetPWork(S)[5] = (void *) config;
}
Пример #3
0
int main()
	{
	
	printf("\n");
	printf("\n");
	printf("\n");
	printf(" HPMPC -- Library for High-Performance implementation of solvers for MPC.\n");
	printf(" Copyright (C) 2014-2015 by Technical University of Denmark. All rights reserved.\n");
	printf("\n");
	printf(" HPMPC is distributed in the hope that it will be useful,\n");
	printf(" but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
	printf(" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n");
	printf(" See the GNU Lesser General Public License for more details.\n");
	printf("\n");
	printf("\n");
	printf("\n");
	
#if defined(TARGET_X64_AVX2) || defined(TARGET_X64_AVX) || defined(TARGET_X64_SSE3) || defined(TARGET_X86_ATOM) || defined(TARGET_AMD_SSE3)
	_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); // flush to zero subnormals !!! works only with one thread !!!
#endif

	int ii, jj;
	
	int rep, nrep=1000; //000;//NREP;

	int nx_ = 8;//NX; // number of states (it has to be even for the mass-spring system test problem)
	int nu_ = 3;//NU; // number of inputs (controllers) (it has to be at least 1 and at most nx/2 for the mass-spring system test problem)
	int N  = 10;//NN; // horizon lenght
//	int nb  = nu+nx; // number of box constrained inputs and states
//	int ng  = nx; //4;  // number of general constraints
//	int ngN = nx; // number of general constraints at the last stage
	printf("\nN = %d, nx = %d, nu = %d\n\n", N, nx_, nu_);

#define MHE 0


//	int nbu = nu<nb ? nu : nb ;
//	int nbx = nb-nu>0 ? nb-nu : 0;


	// stage-wise variant size
	int nx[N+1];
#if MHE==1
	nx[0] = nx_;
#else
	nx[0] = 0;
#endif
	for(ii=1; ii<=N; ii++)
		nx[ii] = nx_;

	int nu[N+1];
	for(ii=0; ii<N; ii++)
		nu[ii] = nu_;
	nu[N] = 0; // XXX

	int nb[N+1];
	nb[0] = nu[0] + nx[0]/2;
	for(ii=1; ii<N; ii++)
		nb[ii] = nu[1] + nx[ii]/2;
	nb[N] = nu[N] + nx[N]/2;

	int ng[N+1];
	for(ii=0; ii<N; ii++)
		ng[ii] = 0; //ng;
	ng[N] = 0; //ngN;
//	ng[M] = nx_; // XXX
	

/************************************************
* IPM common arguments
************************************************/	

	int hpmpc_status;
	int kk = -1;
	int k_max = 10;
	double mu0 = 2.0;
	double mu_tol = 1e-20;
	double alpha_min = 1e-8;
	int warm_start = 0; // read initial guess from x and u
	double *stat; d_zeros(&stat, k_max, 5);
	int compute_res = 1;
	int compute_mult = 1;

/************************************************
* dynamical system
************************************************/	

	double *A; d_zeros(&A, nx_, nx_); // states update matrix

	double *B; d_zeros(&B, nx_, nu_); // inputs matrix

	double *b; d_zeros_align(&b, nx_, 1); // states offset
	double *x0; d_zeros_align(&x0, nx_, 1); // initial state

	double Ts = 0.5; // sampling time
	mass_spring_system(Ts, nx_, nu_, N, A, B, b, x0);
	
	for(jj=0; jj<nx_; jj++)
		b[jj] = 0.1;
	
	for(jj=0; jj<nx_; jj++)
		x0[jj] = 0;
	x0[0] = 2.5;
	x0[1] = 2.5;

#if MHE!=1
	struct blasfeo_dvec sx0;
	blasfeo_allocate_dvec(nx_, &sx0);
	blasfeo_pack_dvec(nx_, x0, &sx0, 0);
	struct blasfeo_dvec sb;
	blasfeo_allocate_dvec(nx_, &sb);
	blasfeo_pack_dvec(nx_, b, &sb, 0);
	struct blasfeo_dmat sA;
	blasfeo_allocate_dmat(nx_, nx_, &sA);
	blasfeo_pack_dmat(nx_, nx_, A, nx_, &sA, 0, 0);
	struct blasfeo_dvec sb0;
	blasfeo_allocate_dvec(nx_, &sb0);
	blasfeo_dgemv_n(nx_, nx_, 1.0, &sA, 0, 0, &sx0, 0, 1.0, &sb, 0, &sb0, 0);

	struct blasfeo_dmat sBAbt0;
	blasfeo_allocate_dmat(nu[0]+1, nx[1], &sBAbt0);
	blasfeo_pack_tran_dmat(nx_, nu_, B, nx_, &sBAbt0, 0, 0);
	blasfeo_drowin(nx[1], 1.0, &sb0, 0, &sBAbt0, nu[0], 0);
//	d_print_strmat(nu[0]+1, nx[1], &sBAbt0, 0, 0);
#endif

	struct blasfeo_dmat sBAbt1;
	if(N>1)
		{
		blasfeo_allocate_dmat(nu[1]+nx[1]+1, nx[2], &sBAbt1);
		blasfeo_pack_tran_dmat(nx_, nu_, B, nx_, &sBAbt1, 0, 0);
		blasfeo_pack_tran_dmat(nx_, nx_, A, nx_, &sBAbt1, nu[1], 0);
		blasfeo_pack_tran_dmat(nx_, 1, b, nx_, &sBAbt1, nu[1]+nx[1], 0);
//		d_print_strmat(nu[1]+nx[1]+1, nx[2], &sBAbt1, 0, 0);
		}
	
/************************************************
* cost function
************************************************/	

	double *R; d_zeros(&R, nu_, nu_);
	for(ii=0; ii<nu_; ii++) R[ii*(nu_+1)] = 2.0;

	double *S; d_zeros(&S, nu_, nx_);

	double *Q; d_zeros(&Q, nx_, nx_);
	for(ii=0; ii<nx_; ii++) Q[ii*(nx_+1)] = 1.0;

	double *r; d_zeros(&r, nu_, 1);
	for(ii=0; ii<nu_; ii++) r[ii] = 0.2;

	double *q; d_zeros(&q, nx_, 1);
	for(ii=0; ii<nx_; ii++) q[ii] = 0.1;

#if MHE!=1
	struct blasfeo_dvec sr;
	blasfeo_allocate_dvec(nu_, &sr);
	blasfeo_pack_dvec(nu_, r, &sr, 0);
	struct blasfeo_dmat sS;
	blasfeo_allocate_dmat(nu_, nx_, &sS);
	blasfeo_pack_dmat(nu_, nx_, S, nu_, &sS, 0, 0);
	struct blasfeo_dvec sr0;
	blasfeo_allocate_dvec(nu_, &sr0);
	blasfeo_dgemv_n(nu_, nx_, 1.0, &sS, 0, 0, &sx0, 0, 1.0, &sr, 0, &sr0, 0);

	struct blasfeo_dmat sRSQrq0;
	blasfeo_allocate_dmat(nu[0]+nx[0]+1, nu[0]+nx[0], &sRSQrq0);
	blasfeo_pack_dmat(nu_, nu_, R, nu_, &sRSQrq0, 0, 0);
	blasfeo_drowin(nu[0], 1.0, &sr0, 0, &sRSQrq0, nu[0], 0);
//	d_print_strmat(nu[0]+nx[0]+1, nu[0]+nx[0], &sRSQrq0, 0, 0);

	struct blasfeo_dvec srq0;
	blasfeo_allocate_dvec(nu[0]+nx[0], &srq0);
	blasfeo_dveccp(nu[0], 1.0, &sr0, 0, &srq0, 0);
#endif

	struct blasfeo_dmat sRSQrq1;
	struct blasfeo_dvec srq1;
	if(N>1)
		{
		blasfeo_allocate_dmat(nu[1]+nx[1]+1, nu[1]+nx[1], &sRSQrq1);
		blasfeo_pack_dmat(nu_, nu_, R, nu_, &sRSQrq1, 0, 0);
		blasfeo_pack_tran_dmat(nu_, nx_, S, nu_, &sRSQrq1, nu[1], 0);
		blasfeo_pack_dmat(nx_, nx_, Q, nx_, &sRSQrq1, nu[1], nu[1]);
		blasfeo_pack_tran_dmat(nu_, 1, r, nu_, &sRSQrq1, nu[1]+nx[1], 0);
		blasfeo_pack_tran_dmat(nx_, 1, q, nx_, &sRSQrq1, nu[1]+nx[1], nu[1]);
//		d_print_strmat(nu[1]+nx[1]+1, nu[1]+nx[1], &sRSQrq1, 0, 0);

		blasfeo_allocate_dvec(nu[1]+nx[1], &srq1);
		blasfeo_pack_dvec(nu_, r, &srq1, 0);
		blasfeo_pack_dvec(nx_, q, &srq1, nu[1]);
		}

	struct blasfeo_dmat sRSQrqN;
	blasfeo_allocate_dmat(nx[N]+1, nx[N], &sRSQrqN);
	blasfeo_pack_dmat(nx_, nx_, Q, nx_, &sRSQrqN, 0, 0);
	blasfeo_pack_tran_dmat(nx_, 1, q, nx_, &sRSQrqN, nx[1], 0);
//	d_print_strmat(nu[N]+nx[N]+1, nu[N]+nx[N], &sRSQrqN, 0, 0);

	struct blasfeo_dvec srqN;
	blasfeo_allocate_dvec(nx[N], &srqN);
	blasfeo_pack_dvec(nx_, q, &srqN, 0);

/************************************************
* constraints
************************************************/	

#if MHE!=1
	double *d0; d_zeros(&d0, 2*nb[0]+2*ng[0], 1);
	int *idxb0; int_zeros(&idxb0, nb[0], 1);
	// inputs
	for(ii=0; ii<nu[0]; ii++)
		{
		d0[ii]             = - 0.5; // u_min
		d0[nb[0]+ng[0]+ii] = + 0.5; // u_max
		idxb0[ii] = ii;
		}
	// states
	for( ; ii<nb[0]; ii++)
		{
		d0[ii]             = - 4.0; // x_min
		d0[nb[0]+ng[0]+ii] = + 4.0; // x_max
		idxb0[ii] = ii;
		}
#endif

	double *d1; 
	int *idxb1; 
	if(N>1)
		{
		d_zeros(&d1, 2*nb[1]+2*ng[1], 1);
		int_zeros(&idxb1, nb[1], 1);
		// inputs
		for(ii=0; ii<nu[1]; ii++)
			{
			d1[ii]             = - 0.5; // u_min
			d1[nb[1]+ng[1]+ii] = + 0.5; // u_max
			idxb1[ii] = ii;
			}
		// states
		for( ; ii<nb[1]; ii++)
			{
			d1[ii]             = - 4.0; // x_min
			d1[nb[1]+ng[1]+ii] = + 4.0; // x_max
			idxb1[ii] = ii;
			}
		}

	double *dN; d_zeros(&dN, 2*nb[N]+2*ng[N], 1);
	int *idxbN; int_zeros(&idxbN, nb[N], 1);
	// no inputs
	// states
	for(ii=0 ; ii<nb[N]; ii++)
		{
		dN[ii]             = - 4.0; // x_min
		dN[nb[N]+ng[N]+ii] = + 4.0; // x_max
		idxbN[ii] = ii;
		}

	struct blasfeo_dvec sd0;
	blasfeo_allocate_dvec(2*nb[0]+2*ng[0], &sd0);
	blasfeo_pack_dvec(2*nb[0]+2*ng[0], d0, &sd0, 0);
//	blasfeo_print_tran_dvec(2*nb[0], &sd0, 0);

	struct blasfeo_dvec sd1;
	blasfeo_allocate_dvec(2*nb[1]+2*ng[1], &sd1);
	blasfeo_pack_dvec(2*nb[1]+2*ng[1], d1, &sd1, 0);
//	blasfeo_print_tran_dvec(2*nb[1], &sd1, 0);

	struct blasfeo_dvec sdN;
	blasfeo_allocate_dvec(2*nb[N]+2*ng[N], &sdN);
	blasfeo_pack_dvec(2*nb[N]+2*ng[N], dN, &sdN, 0);
//	blasfeo_print_tran_dvec(2*nb[N], &sdN, 0);

/************************************************
* array of data matrices
************************************************/	

	// original MPC
	struct blasfeo_dmat hsBAbt[N];
	struct blasfeo_dvec hsb[N];
	struct blasfeo_dmat hsRSQrq[N+1];
	struct blasfeo_dvec hsrq[N+1];
	struct blasfeo_dmat hsDCt[N+1]; // XXX
	struct blasfeo_dvec hsd[N+1];
	int *hidxb[N+1];

	ii = 0;
#if MHE!=1
	hsBAbt[ii] = sBAbt0;
	hsb[ii] = sb0;
	hsRSQrq[ii] = sRSQrq0;
	hsrq[ii] = srq0;
	hsd[ii] = sd0;
	hidxb[0] = idxb0;
#else
	hsBAbt[ii] = sBAbt1;
	hsb[ii] = sb;
	hsRSQrq[ii] = sRSQrq1;
	hsrq[ii] = srq1;
	hsd[ii] = sd1;
	hidxb[0] = idxb1;
#endif

	for(ii=1; ii<N; ii++)
		{
		hsBAbt[ii] = sBAbt1;
		hsb[ii] = sb;
		hsRSQrq[ii] = sRSQrq1;
		hsrq[ii] = srq1;
		hsd[ii] = sd1;
		hidxb[ii] = idxb1;
		}
	hsRSQrq[ii] = sRSQrqN;
	hsrq[ii] = srqN;
	hsd[ii] = sdN;
	hidxb[N] = idxbN;

/************************************************
* solve full spase system using Riccati / IPM
************************************************/	

	// result vectors
	struct blasfeo_dvec hsux[N+1];
	struct blasfeo_dvec hspi[N+1];
	struct blasfeo_dvec hslam[N+1];
	struct blasfeo_dvec hst[N+1];
	for(ii=0; ii<=N; ii++)
		{
		blasfeo_allocate_dvec(nu[ii]+nx[ii], &hsux[ii]);
		blasfeo_allocate_dvec(nx[ii], &hspi[ii]);
		blasfeo_allocate_dvec(2*nb[ii]+2*ng[ii], &hslam[ii]);
		blasfeo_allocate_dvec(2*nb[ii]+2*ng[ii], &hst[ii]);
		}

	// work space
	void *work_space_ipm;
	v_zeros_align(&work_space_ipm, d_ip2_res_mpc_hard_work_space_size_bytes_libstr(N, nx, nu, nb, ng));

	struct timeval tv0, tv1;

	printf("\nsolving... (full space system)\n");

	gettimeofday(&tv0, NULL); // stop

	for(rep=0; rep<nrep; rep++)
		{
		hpmpc_status = d_ip2_res_mpc_hard_libstr(&kk, k_max, mu0, mu_tol, alpha_min, warm_start, stat, N, nx, nu, nb, hidxb, ng, hsBAbt, hsRSQrq, hsDCt, hsd, hsux, 1, hspi, hslam, hst, work_space_ipm);
		}

	gettimeofday(&tv1, NULL); // stop

	printf("\n... done\n");

	float time_ipm_full = (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6);

	printf("\nstatistics from last run\n\n");
	for(jj=0; jj<kk; jj++)
		printf("k = %d\tsigma = %f\talpha = %f\tmu = %f\t\tmu = %e\talpha = %f\tmu = %f\tmu = %e\n", jj, stat[5*jj], stat[5*jj+1], stat[5*jj+2], stat[5*jj+2], stat[5*jj+3], stat[5*jj+4], stat[5*jj+4]);
	printf("\n");
	
	printf("\nux =\n\n");
	for(ii=0; ii<=N; ii++)
		blasfeo_print_tran_dvec(nu[ii]+nx[ii], &hsux[ii], 0);

	printf("\npi =\n\n");
	for(ii=0; ii<=N; ii++)
		blasfeo_print_tran_dvec(nx[ii], &hspi[ii], 0);

	printf("\nlam =\n\n");
	for(ii=0; ii<=N; ii++)
		blasfeo_print_tran_dvec(2*nb[ii]+2*ng[ii], &hslam[ii], 0);

	printf("\nt =\n\n");
	for(ii=0; ii<=N; ii++)
		blasfeo_print_tran_dvec(2*nb[ii]+2*ng[ii], &hst[ii], 0);

	// residuals vectors
	struct blasfeo_dvec hsrrq[N+1];
	struct blasfeo_dvec hsrb[N+1];
	struct blasfeo_dvec hsrd[N+1];
	struct blasfeo_dvec hsrm[N+1];
	double mu;

	for(ii=0; ii<N; ii++)
		{
		blasfeo_allocate_dvec(nu[ii]+nx[ii], &hsrrq[ii]);
		blasfeo_allocate_dvec(nx[ii+1], &hsrb[ii]);
		blasfeo_allocate_dvec(2*nb[ii]+2*ng[ii], &hsrd[ii]);
		blasfeo_allocate_dvec(2*nb[ii]+2*ng[ii], &hsrm[ii]);
		}
	blasfeo_allocate_dvec(nu[N]+nx[N], &hsrrq[N]);
	blasfeo_allocate_dvec(2*nb[N]+2*ng[N], &hsrd[N]);
	blasfeo_allocate_dvec(2*nb[N]+2*ng[N], &hsrm[N]);

	int ngM = ng[0];
	for(ii=1; ii<=N; ii++)
		{
		ngM = ng[ii]>ngM ? ng[ii] : ngM;
		}

	void *work_space_res;
	v_zeros_align(&work_space_res, d_res_res_mpc_hard_work_space_size_bytes_libstr(N, nx, nu, nb, ng));

	d_res_res_mpc_hard_libstr(N, nx, nu, nb, hidxb, ng, hsBAbt, hsb, hsRSQrq, hsrq, hsux, hsDCt, hsd, hspi, hslam, hst, hsrrq, hsrb, hsrd, hsrm, &mu, work_space_res);

	printf("\nres_rq\n");
	for(ii=0; ii<=N; ii++)
		blasfeo_print_exp_tran_dvec(nu[ii]+nx[ii], &hsrrq[ii], 0);

	printf("\nres_b\n");
	for(ii=0; ii<N; ii++)
		blasfeo_print_exp_tran_dvec(nx[ii+1], &hsrb[ii], 0);

	printf("\nres_d\n");
	for(ii=0; ii<=N; ii++)
		blasfeo_print_exp_tran_dvec(2*nb[ii]+2*ng[ii], &hsrd[ii], 0);

	printf("\nres_m\n");
	for(ii=0; ii<=N; ii++)
		blasfeo_print_exp_tran_dvec(2*nb[ii]+2*ng[ii], &hsrm[ii], 0);

/************************************************
* full condensing
************************************************/	

	// condensed problem size
	int N2 = 1;

	int nx2[N2+1];
	int nu2[N2+1];
	int nb2[N2+1];
	int ng2[N2+1];

	d_cond_compute_problem_size_libstr(N, nx, nu, nb, hidxb, ng, nx2, nu2, nb2, ng2);
	
#if 0
	for(ii=0; ii<=N2; ii++)
		printf("\n%d %d %d %d\n", nx2[ii], nu2[ii], nb2[ii], ng2[ii]);
#endif

	int work_sizes_cond[5];
	int work_size_cond = d_cond_work_space_size_bytes_libstr(N, nx, nu, nb, hidxb, ng, nx2, nu2, nb2, ng2, work_sizes_cond);
	int memo_size_cond = d_cond_memory_space_size_bytes_libstr(N, nx, nu, nb, hidxb, ng, nx, nu2, nb2, ng2);
	int work_size_ipm_cond = d_ip2_res_mpc_hard_work_space_size_bytes_libstr(N2, nx2, nu2, nb2, ng2);
	int work_sizes_expa[2];
	int work_size_expa = d_expand_work_space_size_bytes_libstr(N, nx, nu, nb, ng, work_sizes_expa);

	// work space
	void *work_cond;
	void *memo_cond;
	void *work_ipm_cond;
	void *work_expa;

	v_zeros_align(&work_cond, work_size_cond);
	v_zeros_align(&memo_cond, memo_size_cond);
	v_zeros_align(&work_ipm_cond, work_size_ipm_cond);
	v_zeros_align(&work_expa, work_size_expa);

	// data matrices
	struct blasfeo_dmat hsBAbt2[N2];
	struct blasfeo_dvec hsb2[N2];
	struct blasfeo_dmat hsRSQrq2[N2+1];
	struct blasfeo_dvec hsrq2[N2+1];
	struct blasfeo_dmat hsDCt2[N2+1];
	struct blasfeo_dvec hsd2[N2+1];
	int *hidxb2[N2+1];

	for(ii=0; ii<N2; ii++)
		blasfeo_allocate_dmat(nu2[ii]+nx2[ii]+1, nx2[ii+1], &hsBAbt2[ii]);
	
	for(ii=0; ii<N2; ii++)
		blasfeo_allocate_dvec(nx2[ii+1], &hsb2[ii]);
	
	for(ii=0; ii<=N2; ii++)
		blasfeo_allocate_dmat(nu2[ii]+nx2[ii]+1, nu2[ii]+nx2[ii], &hsRSQrq2[ii]);
	
	for(ii=0; ii<=N2; ii++)
		blasfeo_allocate_dvec(nu2[ii]+nx2[ii], &hsrq2[ii]);
	
	for(ii=0; ii<=N2; ii++)
		blasfeo_allocate_dmat(nu2[ii]+nx2[ii]+1, ng2[ii], &hsDCt2[ii]);
	
	for(ii=0; ii<=N2; ii++)
		blasfeo_allocate_dvec(2*nb2[ii]+2*ng2[ii], &hsd2[ii]);
	
	for(ii=0; ii<=N2; ii++)
		int_zeros(&hidxb2[ii], nb2[ii], 1);
	
	// result vectors
	struct blasfeo_dvec hsux2[N2+1];
	struct blasfeo_dvec hspi2[N2+1];
	struct blasfeo_dvec hslam2[N2+1];
	struct blasfeo_dvec hst2[N2+1];
	for(ii=0; ii<=N2; ii++)
		{
		blasfeo_allocate_dvec(nu2[ii]+nx2[ii], &hsux2[ii]);
		blasfeo_allocate_dvec(nx2[ii], &hspi2[ii]);
		blasfeo_allocate_dvec(2*nb2[ii]+2*ng2[ii], &hslam2[ii]);
		blasfeo_allocate_dvec(2*nb2[ii]+2*ng2[ii], &hst2[ii]);
		}

	d_cond_libstr(N, nx, nu, nb, hidxb, ng, hsBAbt, hsRSQrq, hsDCt, hsd, nx2, nu2, nb2, hidxb2, ng2, hsBAbt2, hsRSQrq2, hsDCt2, hsd2, memo_cond, work_cond, work_sizes_cond);

#if 0
	printf("\nBAbt2\n");
	for(ii=0; ii<N2; ii++)
		d_print_strmat(nu2[ii]+nx2[ii]+1, nx2[ii+1], &hsBAbt2[ii], 0, 0);
	printf("\nRSQrq2\n");
	for(ii=0; ii<=N2; ii++)
		d_print_strmat(nu2[ii]+nx2[ii]+1, nu2[ii]+nx2[ii], &hsRSQrq2[ii], 0, 0);
	printf("\nDCt2\n");
	for(ii=0; ii<=N2; ii++)
		d_print_strmat(nu2[ii]+nx2[ii], ng2[ii], &hsDCt2[ii], 0, 0);
	printf("\nd2\n");
	for(ii=0; ii<=N2; ii++)
		blasfeo_print_tran_dvec(2*nb2[ii]+2*ng2[ii], &hsd2[ii], 0);
#endif


/************************************************
* solve condensed system using IPM
************************************************/	
	
	// zero solution
	for(ii=0; ii<=N; ii++)
		blasfeo_dvecse(nu[ii]+nx[ii], 0.0, &hsux[ii], 0);
	for(ii=0; ii<=N; ii++)
		blasfeo_dvecse(nx[ii], 0.0, &hspi[ii], 0);
	for(ii=0; ii<=N; ii++)
		blasfeo_dvecse(2*nb[ii]+2*ng[ii], 0.0, &hslam[ii], 0);
	for(ii=0; ii<=N; ii++)
		blasfeo_dvecse(2*nb[ii]+2*ng[ii], 0.0, &hst[ii], 0);

	printf("\nsolving... (condensed system)\n");

	gettimeofday(&tv0, NULL); // stop

	for(rep=0; rep<nrep; rep++)
		{

		d_cond_libstr(N, nx, nu, nb, hidxb, ng, hsBAbt, hsRSQrq, hsDCt, hsd, nx2, nu2, nb2, hidxb2, ng2, hsBAbt2, hsRSQrq2, hsDCt2, hsd2, memo_cond, work_cond, work_sizes_cond);

		hpmpc_status = d_ip2_res_mpc_hard_libstr(&kk, k_max, mu0, mu_tol, alpha_min, warm_start, stat, N2, nx2, nu2, nb2, hidxb2, ng2, hsBAbt2, hsRSQrq2, hsDCt2, hsd2, hsux2, 1, hspi2, hslam2, hst2, work_ipm_cond);

		d_expand_solution_libstr(N, nx, nu, nb, hidxb, ng, hsBAbt, hsb, hsRSQrq, hsrq, hsDCt, hsux, hspi, hslam, hst, nx2, nu2, nb2, hidxb2, ng2, hsux2, hspi2, hslam2, hst2, work_expa, work_sizes_expa);

		}

	gettimeofday(&tv1, NULL); // stop

	printf("\n... done\n");

	float time_ipm_cond = (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6);

	printf("\nstatistics from last run\n\n");
	for(jj=0; jj<kk; jj++)
		printf("k = %d\tsigma = %f\talpha = %f\tmu = %f\t\tmu = %e\talpha = %f\tmu = %f\tmu = %e\n", jj, stat[5*jj], stat[5*jj+1], stat[5*jj+2], stat[5*jj+2], stat[5*jj+3], stat[5*jj+4], stat[5*jj+4]);
	printf("\n");
	
#if 0
	printf("\nux2 =\n\n");
	for(ii=0; ii<=N2; ii++)
		blasfeo_print_tran_dvec(nu2[ii]+nx2[ii], &hsux2[ii], 0);

	printf("\npi2 =\n\n");
	for(ii=0; ii<=N2; ii++)
		blasfeo_print_tran_dvec(nx2[ii], &hspi2[ii], 0);

	printf("\nlam2 =\n\n");
	for(ii=0; ii<=N2; ii++)
		blasfeo_print_tran_dvec(2*nb2[ii]+2*ng2[ii], &hslam2[ii], 0);

	printf("\nt2 =\n\n");
	for(ii=0; ii<=N2; ii++)
		blasfeo_print_tran_dvec(2*nb2[ii]+2*ng2[ii], &hst2[ii], 0);
#endif

	printf("\nux =\n\n");
	for(ii=0; ii<=N; ii++)
		blasfeo_print_tran_dvec(nu[ii]+nx[ii], &hsux[ii], 0);

	printf("\npi =\n\n");
	for(ii=0; ii<=N; ii++)
		blasfeo_print_tran_dvec(nx[ii], &hspi[ii], 0);

	printf("\nlam =\n\n");
	for(ii=0; ii<=N; ii++)
		blasfeo_print_tran_dvec(2*nb[ii]+2*ng[ii], &hslam[ii], 0);

	printf("\nt =\n\n");
	for(ii=0; ii<=N; ii++)
		blasfeo_print_tran_dvec(2*nb[ii]+2*ng[ii], &hst[ii], 0);

/************************************************
* free memory full space
************************************************/	

	// TODO
	d_free(A);
	d_free(B);
	d_free(b);
	d_free(x0);
	d_free(R);
	d_free(S);
	d_free(Q);
	d_free(r);
	d_free(q);
	d_free(d0);
	int_free(idxb0);
	d_free(d1);
	int_free(idxb1);
	d_free(dN);
	int_free(idxbN);

	v_free_align(work_space_ipm);

	blasfeo_free_dvec(&sx0);
	blasfeo_free_dvec(&sb);
	blasfeo_free_dmat(&sA);
	blasfeo_free_dvec(&sb0);
	blasfeo_free_dmat(&sBAbt0);
	if(N>1)
		blasfeo_free_dmat(&sBAbt1);
	blasfeo_free_dvec(&sr);
	blasfeo_free_dmat(&sS);
	blasfeo_free_dvec(&sr0);
	blasfeo_free_dmat(&sRSQrq0);
	blasfeo_free_dvec(&srq0);
	if(N>1)
		blasfeo_free_dmat(&sRSQrq1);
	if(N>1)
		blasfeo_free_dvec(&srq1);
	blasfeo_free_dmat(&sRSQrqN);
	blasfeo_free_dvec(&srqN);
	blasfeo_free_dvec(&sd0);
	blasfeo_free_dvec(&sd1);
	blasfeo_free_dvec(&sdN);
	for(ii=0; ii<N; ii++)
		{
		blasfeo_free_dvec(&hsux[ii]);
		blasfeo_free_dvec(&hspi[ii]);
		blasfeo_free_dvec(&hslam[ii]);
		blasfeo_free_dvec(&hst[ii]);
		blasfeo_free_dvec(&hsrrq[ii]);
		blasfeo_free_dvec(&hsrb[ii]);
		blasfeo_free_dvec(&hsrd[ii]);
		blasfeo_free_dvec(&hsrm[ii]);
		}
	ii = N;
	blasfeo_free_dvec(&hsux[ii]);
	blasfeo_free_dvec(&hspi[ii]);
	blasfeo_free_dvec(&hslam[ii]);
	blasfeo_free_dvec(&hst[ii]);
	blasfeo_free_dvec(&hsrrq[ii]);
	blasfeo_free_dvec(&hsrd[ii]);
	blasfeo_free_dvec(&hsrm[ii]);

	v_free_align(work_space_res);

/************************************************
* print timings
************************************************/	

	printf("\ntime ipm full (in sec): %e", time_ipm_full);
	printf("\ntime ipm cond (in sec): %e\n\n", time_ipm_cond);

/************************************************
* return
************************************************/	

	return 0;

	}
Пример #4
0
int sim_lifted_irk(void *config_, sim_in *in, sim_out *out, void *opts_, void *mem_,
                       void *work_)
{
    // typecasting
    sim_config *config = config_;
    sim_opts *opts = opts_;
    sim_lifted_irk_memory *memory = (sim_lifted_irk_memory *) mem_;

    void *dims_ = in->dims;
    sim_lifted_irk_dims *dims = (sim_lifted_irk_dims *) dims_;

    sim_lifted_irk_workspace *workspace =
        (sim_lifted_irk_workspace *) sim_lifted_irk_cast_workspace(config, dims, opts,
                                                                           work_);

    int nx = dims->nx;
    int nu = dims->nu;
    int nz = dims->nz;

    int ns = opts->ns;

    if ( opts->ns != opts->tableau_size )
    {
        printf("Error in sim_lifted_irk: the Butcher tableau size does not match ns");
        return ACADOS_FAILURE;
    }
    // assert - only use supported features
    if (nz != 0)
    {
        printf("nz should be zero - DAEs are not supported by the lifted IRK integrator");
        return ACADOS_FAILURE;
    }
    if (opts->output_z)
    {
        printf("opts->output_z should be false - DAEs are not supported for the lifted IRK integrator");
        return ACADOS_FAILURE;
    }
    if (opts->sens_algebraic)
    {
        printf("opts->sens_algebraic should be false - DAEs are not supported for the lifted IRK integrator");
        return ACADOS_FAILURE;
    }

    int ii, jj, ss;
    double a;


    double *x = in->x;
    double *u = in->u;
    double *S_forw_in = in->S_forw;

    // int newton_iter = opts->newton_iter; // not used; always 1 in lifted

    double *A_mat = opts->A_mat;
    double *b_vec = opts->b_vec;
    int num_steps = opts->num_steps;

    double step = in->T / num_steps;
    // TODO(FreyJo): this should be an option!
    int update_sens = memory->update_sens;

    int *ipiv = workspace->ipiv;
    struct blasfeo_dmat *JGK = memory->JGK;
    struct blasfeo_dmat *S_forw = memory->S_forw;

    struct blasfeo_dmat *J_temp_x = workspace->J_temp_x;
    struct blasfeo_dmat *J_temp_xdot = workspace->J_temp_xdot;
    struct blasfeo_dmat *J_temp_u = workspace->J_temp_u;

    struct blasfeo_dvec *rG = workspace->rG;
    struct blasfeo_dvec *K = memory->K;
    struct blasfeo_dmat *JGf = memory->JGf;
    struct blasfeo_dmat *JKf = memory->JKf;
    struct blasfeo_dvec *xt = workspace->xt;
    struct blasfeo_dvec *xn = workspace->xn;
    struct blasfeo_dvec *xn_out = workspace->xn_out;
    struct blasfeo_dvec *dxn = workspace->dxn;

    struct blasfeo_dvec *w = workspace->w;

    double *x_out = out->xn;
    double *S_forw_out = out->S_forw;

    struct blasfeo_dvec_args ext_fun_in_K;

    ext_fun_arg_t ext_fun_type_in[3];
    void *ext_fun_in[3];

    struct blasfeo_dvec_args ext_fun_out_rG;
    ext_fun_arg_t ext_fun_type_out[5];
    void *ext_fun_out[5];

    lifted_irk_model *model = in->model;

    acados_timer timer, timer_ad;
    double timing_ad = 0.0;

    if (opts->sens_adj)
    {
        printf("LIFTED_IRK with ADJOINT SENSITIVITIES - NOT IMPLEMENTED YET - EXITING.");
        exit(1);
    }


    blasfeo_dgese(nx, nx, 0.0, J_temp_x, 0, 0);
    blasfeo_dgese(nx, nx, 0.0, J_temp_xdot, 0, 0);
    blasfeo_dgese(nx, nu, 0.0, J_temp_x, 0, 0);

    blasfeo_dvecse(nx * ns, 0.0, rG, 0);

    // TODO(dimitris): shouldn't this be NF instead of nx+nu??
    if (update_sens) blasfeo_pack_dmat(nx, nx + nu, S_forw_in, nx, S_forw, 0, 0);

    blasfeo_dvecse(nx * ns, 0.0, rG, 0);
    blasfeo_pack_dvec(nx, x, xn, 0);
    blasfeo_pack_dvec(nx, x, xn_out, 0);
    blasfeo_dvecse(nx, 0.0, dxn, 0);


    // start the loop
    acados_tic(&timer);
    for (ss = 0; ss < num_steps; ss++)
    {
        // initialize
        blasfeo_dgese(nx * ns, nx * ns, 0.0, JGK, 0, 0);
        blasfeo_dgese(nx * ns, nx + nu, 0.0, JGf, 0, 0);

        // expansion step (K variables)
        // compute x and u step
        blasfeo_pack_dvec(nx, in->x, w, 0);
        blasfeo_pack_dvec(nu, in->u, w, nx);

        blasfeo_daxpy(nx, -1.0, memory->x, 0, w, 0, w, 0);
        blasfeo_daxpy(nu, -1.0, memory->u, 0, w, nx, w, nx);
        blasfeo_dgemv_n(nx * ns, nx + nu, 1.0, &JKf[ss], 0, 0, w, 0, 1.0, &K[ss], 0, &K[ss], 0);

        blasfeo_pack_dvec(nx, in->x, memory->x, 0);
        blasfeo_pack_dvec(nu, in->u, memory->u, 0);

        // reset value of JKf
        blasfeo_dgese(nx * ns, nx + nu, 0.0, &JKf[ss], 0, 0);

        for (ii = 0; ii < ns; ii++)  // ii-th row of tableau
        {
            // take x(n); copy a strvec into a strvec
            blasfeo_dveccp(nx, xn, 0, xt, 0);

            for (jj = 0; jj < ns; jj++)
            {  // jj-th col of tableau
                a = A_mat[ii + ns * jj];
                if (a != 0)
                {
                    // xt = xt + T_int * a[i,j]*K_j
                    a *= step;
                    blasfeo_daxpy(nx, a, &K[ss], jj * nx, xt, 0, xt, 0);
                }
            }

            if (!update_sens)
            {
                // compute the residual of implicit ode at time t_ii, store value in rGt
                acados_tic(&timer_ad);

                ext_fun_type_in[0] = BLASFEO_DVEC;
                ext_fun_in[0] = xt;  // x: nx
                ext_fun_type_in[1] = BLASFEO_DVEC_ARGS;
                ext_fun_in_K.xi = ii * nx;
                ext_fun_in_K.x = &K[ss];
                ext_fun_in[1] = &ext_fun_in_K;  // K[ii*nx]: nx
                ext_fun_type_in[2] = COLMAJ;
                ext_fun_in[2] = u;  // u: nu

                ext_fun_type_out[0] = BLASFEO_DVEC_ARGS;
                ext_fun_out_rG.x = rG;
                ext_fun_out_rG.xi = ii * nx;
                ext_fun_out[0] = &ext_fun_out_rG;  // fun: nx

                model->impl_ode_fun->evaluate(model->impl_ode_fun, ext_fun_type_in, ext_fun_in,
                                              ext_fun_type_out, ext_fun_out);

                timing_ad += acados_toc(&timer_ad);
            }
            else
            {
                // compute the jacobian of implicit ode
                acados_tic(&timer_ad);

                ext_fun_type_in[0] = BLASFEO_DVEC;
                ext_fun_in[0] = xt;  // x: nx
                ext_fun_type_in[1] = BLASFEO_DVEC_ARGS;
                ext_fun_in_K.xi = ii * nx;  // K[ii*nx]: nx
                ext_fun_in_K.x = &K[ss];
                ext_fun_in[1] = &ext_fun_in_K;
                ext_fun_type_in[2] = COLMAJ;
                ext_fun_in[2] = u;  // u: nu

                ext_fun_type_out[0] = BLASFEO_DVEC_ARGS;
                ext_fun_out_rG.x = rG;
                ext_fun_out_rG.xi = ii * nx;
                ext_fun_out[0] = &ext_fun_out_rG;  // fun: nx
                ext_fun_type_out[1] = BLASFEO_DMAT;
                ext_fun_out[1] = J_temp_x;  // jac_x: nx*nx
                ext_fun_type_out[2] = BLASFEO_DMAT;
                ext_fun_out[2] = J_temp_xdot;  // jac_xdot: nx*nx
                ext_fun_type_out[3] = BLASFEO_DMAT;
                ext_fun_out[3] = J_temp_u;  // jac_u: nx*nu

                model->impl_ode_fun_jac_x_xdot_u->evaluate(model->impl_ode_fun_jac_x_xdot_u,
                                                           ext_fun_type_in, ext_fun_in,
                                                           ext_fun_type_out, ext_fun_out);

                timing_ad += acados_toc(&timer_ad);

                blasfeo_dgecp(nx, nx, J_temp_x, 0, 0, JGf, ii * nx, 0);
                blasfeo_dgecp(nx, nu, J_temp_u, 0, 0, JGf, ii * nx, nx);

                for (jj = 0; jj < ns; jj++)
                {
                    // compute the block (ii,jj)th block of JGK
                    a = A_mat[ii + ns * jj];
                    if (a != 0)
                    {
                        a *= step;
                        blasfeo_dgead(nx, nx, a, J_temp_x, 0, 0, JGK, ii * nx, jj * nx);
                    }
                    if (jj == ii)
                    {
                        blasfeo_dgead(nx, nx, 1, J_temp_xdot, 0, 0, JGK, ii * nx, jj * nx);
                    }
                }  // end jj
            }
        }  // end ii

        // obtain x(n+1) before updating K(n)
        for (ii = 0; ii < ns; ii++)
            blasfeo_daxpy(nx, step * b_vec[ii], &K[ss], ii * nx, xn, 0, xn, 0);

        // DGETRF computes an LU factorization of a general M-by-N matrix A
        // using partial pivoting with row interchanges.

        if (update_sens)
        {
            blasfeo_dgetrf_rp(nx * ns, nx * ns, JGK, 0, 0, JGK, 0, 0, ipiv);
        }

        // update r.h.s (6.23, Quirynen2017)
        blasfeo_dgemv_n(nx * ns, nx, 1.0, JGf, 0, 0, dxn, 0, 1.0, rG, 0, rG, 0);


        // permute also the r.h.s
        blasfeo_dvecpe(nx * ns, ipiv, rG, 0);

        // solve JGK * y = rG, JGK on the (l)eft, (l)ower-trian, (n)o-trans
        //                    (u)nit trian
        blasfeo_dtrsv_lnu(nx * ns, JGK, 0, 0, rG, 0, rG, 0);

        // solve JGK * x = rG, JGK on the (l)eft, (u)pper-trian, (n)o-trans
        //                    (n)o unit trian , and store x in rG
        blasfeo_dtrsv_unn(nx * ns, JGK, 0, 0, rG, 0, rG, 0);


        // scale and add a generic strmat into a generic strmat // K = K - rG, where rG is DeltaK
        blasfeo_daxpy(nx * ns, -1.0, rG, 0, &K[ss], 0, &K[ss], 0);

        // obtain dx(n)
        for (ii = 0; ii < ns; ii++)
            blasfeo_daxpy(nx, -step * b_vec[ii], rG, ii * nx, dxn, 0, dxn, 0);

        // update JKf
        blasfeo_dgemm_nn(nx * ns, nx + nu, nx, 1.0, JGf, 0, 0, S_forw, 0, 0, 0.0, &JKf[ss], 0, 0,
                         &JKf[ss], 0, 0);

        blasfeo_dgead(nx * ns, nu, 1.0, JGf, 0, nx, &JKf[ss], 0, nx);

        blasfeo_drowpe(nx * ns, ipiv, &JKf[ss]);
        blasfeo_dtrsm_llnu(nx * ns, nx + nu, 1.0, JGK, 0, 0, &JKf[ss], 0, 0, &JKf[ss], 0, 0);
        blasfeo_dtrsm_lunn(nx * ns, nx + nu, 1.0, JGK, 0, 0, &JKf[ss], 0, 0, &JKf[ss], 0, 0);

        // update forward sensitivity
        for (jj = 0; jj < ns; jj++)
            blasfeo_dgead(nx, nx + nu, -step * b_vec[jj], &JKf[ss], jj * nx, 0, S_forw, 0, 0);

        // obtain x(n+1)
        for (ii = 0; ii < ns; ii++)
            blasfeo_daxpy(nx, step * b_vec[ii], &K[ss], ii * nx, xn_out, 0, xn_out, 0);

    }  // end int step ss


    // extract output
    blasfeo_unpack_dvec(nx, xn_out, 0, x_out);

    blasfeo_unpack_dmat(nx, nx + nu, S_forw, 0, 0, S_forw_out, nx);

    out->info->CPUtime = acados_toc(&timer);
    out->info->LAtime = 0.0;
    out->info->ADtime = timing_ad;

    return 0;
}