/* primal-dual interior-point method, hard constraints, time variant matrices (mpc version) ; version with A diagonal and nu & nx time-variant*/ int d_ip2_diag_mpc(int *kk, int k_max, double mu0, double mu_tol, double alpha_min, int warm_start, double *sigma_par, double *stat, int N, int *nx, int *nu, int *nb, int **idxb, double **dA, double **pBt, double **pR, double **pSt, double **pQ, double **b, double **d, double **rq, double **ux, int compute_mult, double **pi, double **lam, double **t, double *work_memory) { // indeces int jj, ll, ii, bs0, idx; // constants const int bs = D_MR; //d_get_mr(); const int ncl = D_NCL; const int nal = D_MR*D_NCL; // number of doubles per cache line // const int nz = nx+nu+1; // const int nxu = nx+nu; // const int pnz = bs*((nz+bs-1)/bs); // const int pnx = bs*((nx+bs-1)/bs); // const int pnb = bs*((nb+bs-1)/bs); // simd aligned number of two-sided box constraints !!!!!!!!!!!!!!!!!! // const int cnz = ncl*((nz+ncl-1)/ncl); // const int cnx = ncl*((nx+ncl-1)/ncl); // const int anz = nal*((nz+nal-1)/nal); // const int anx = nal*((nx+nal-1)/nal); // const int anb = nal*((2*nb+nal-1)/nal); // cache aligned number of box constraints //const int anb = nal*((nb+nal-1)/nal); // cache aligned number of two-sided box constraints !!!!!!!!!!!!!!!!!! // const int pad = (ncl-nx%ncl)%ncl; // packing between BAbtL & P //const int cnl = cnz<cnx+ncl ? nx+pad+cnx+ncl : nx+pad+cnz; // const int cnl = cnz<cnx+ncl ? cnx+ncl : cnz; //printf("\n%d %d %d %d %d\n", N, nx, nu, nb, ng); //d_print_pmat(nz, nx, bs, pBAbt[0], cnx); //d_print_pmat(nz, nx, bs, pBAbt[1], cnx); //d_print_pmat(nz, nx, bs, pBAbt[N-1], cnx); //d_print_pmat(nz, nz, bs, pQ[0], cnz); //d_print_pmat(nz, nz, bs, pQ[1], cnz); //d_print_pmat(nz, nz, bs, pQ[N], cnz); //d_print_pmat(nx+nu, ng, bs, pDCt[0], cng); //d_print_pmat(nx+nu, ng, bs, pDCt[1], cng); //d_print_pmat(nx+nu, ng, bs, pDCt[N], cng); //d_print_mat(1, 2*pnb+2*png, d[0], 1); //d_print_mat(1, 2*pnb+2*png, d[1], 1); //d_print_mat(1, 2*pnb+2*png, d[N], 1); //d_print_mat(1, nx+nu, ux[0], 1); //d_print_mat(1, nx+nu, ux[1], 1); //d_print_mat(1, nx+nu, ux[N], 1); //exit(1); double *ptr; ptr = work_memory; int *ptr_int, *anu, *anx, *pnu, *pnx, *pnb, *cnu, *cnx; ptr_int = (int *) ptr; anu = ptr_int; ptr_int += (N+1); anx = ptr_int; ptr_int += (N+1); pnu = ptr_int; ptr_int += (N+1); pnx = ptr_int; ptr_int += (N+1); pnb = ptr_int; ptr_int += (N+1); cnu = ptr_int; ptr_int += (N+1); cnx = ptr_int; ptr_int += (N+1); for(jj=0; jj<=N; jj++) { anu[jj] = (nu[jj]+nal-1)/nal*nal; anx[jj] = (nx[jj]+nal-1)/nal*nal; pnu[jj] = (nu[jj]+bs-1)/bs*bs; pnx[jj] = (nx[jj]+bs-1)/bs*bs; pnb[jj] = (nb[jj]+bs-1)/bs*bs; cnu[jj] = (nu[jj]+ncl-1)/ncl*ncl; cnx[jj] = (nx[jj]+ncl-1)/ncl*ncl; } int pnxM = 0; for(jj=0; jj<=N; jj++) pnxM = pnx[jj]>pnxM ? pnx[jj] : pnxM; int pnuM = 0; for(jj=0; jj<=N; jj++) pnuM = pnu[jj]>pnuM ? pnu[jj] : pnuM; int cnuM = 0; for(jj=0; jj<=N; jj++) cnuM = cnu[jj]>cnuM ? cnu[jj] : cnuM; /* align work space */ size_t align = 64; size_t addr = (size_t) ptr_int; size_t offset = addr % align; ptr_int = ptr_int + offset / sizeof(int); ptr = (double *) ptr_int; // initialize work space double *(pL[N]); double *pK; double *(pP[N+1]); double *(dux[N+1]); double *(dpi[N+1]); double *(Pb[N]); double *(pd[N+1]); double *(pl[N+1]); double *(bd[N+1]); double *(bl[N+1]); double *(dlam[N+1]); double *(dt[N+1]); double *(lamt[N+1]); double *(t_inv[N+1]); double *work; // ptr += (N+1)*(pnx + pnz*cnl + 12*pnz) + 3*pnz; // hpL for(jj=0; jj<N; jj++) { pL[jj] = ptr; ptr += (pnu[jj]+pnx[jj])*cnu[jj]; } // pK pK = ptr; ptr += pnxM*cnuM; // hpP for(jj=0; jj<=N; jj++) { pP[jj] = ptr; ptr += pnx[jj]*cnx[jj]; } // inputs and states for(jj=0; jj<=N; jj++) { dux[jj] = ptr; ptr += anu[jj]+anx[jj]; } // equality constr multipliers for(jj=0; jj<=N; jj++) { dpi[jj] = ptr; ptr += anx[jj]; } // backup of P*b for(jj=0; jj<N; jj++) { Pb[jj] = ptr; ptr += anx[jj+1]; } // Hessian for(jj=0; jj<=N; jj++) { pd[jj] = ptr; //pQ[jj]; pl[jj] = ptr + 1*(pnb[jj]); bd[jj] = ptr + 2*(pnb[jj]); bl[jj] = ptr + 3*(pnb[jj]); ptr += 4*(pnb[jj]); // backup //for(ll=0; ll<nu[jj]; ll++) // bd[jj][ll] = pR[jj][(ll/bs)*bs*cnu[jj]+ll%bs+ll*bs]; //for(ll=0; ll<nx[jj]; ll++) // bd[jj][nu[jj]+ll] = pQ[jj][(ll/bs)*bs*cnx[jj]+ll%bs+ll*bs]; for(ll=0; ll<nb[jj] && idxb[jj][ll]<nu[jj]; ll++) { idx = idxb[jj][ll]; bd[jj][ll] = pR[jj][idx/bs*bs*cnu[jj]+idx%bs+idx*bs]; bl[jj][ll] = rq[jj][idx]; } for(; ll<nb[jj]; ll++) { idx = idxb[jj][ll] - nu[jj]; bd[jj][ll] = pQ[jj][idx/bs*bs*cnx[jj]+idx%bs+idx*bs]; bl[jj][ll] = rq[jj][idx]; } //d_print_mat(1, nb[jj], bd[jj], 1); } //exit(1); // slack variables, Lagrangian multipliers for inequality constraints and work space for(jj=0; jj<=N; jj++) { dlam[jj] = ptr; dt[jj] = ptr + 2*pnb[jj]; ptr += 4*pnb[jj]; } for(jj=0; jj<=N; jj++) { lamt[jj] = ptr; ptr += 2*pnb[jj]; } for(jj=0; jj<=N; jj++) { t_inv[jj] = ptr; ptr += 2*pnb[jj]; } work = ptr; ptr += pnxM + pnuM; double temp0, temp1; double alpha, mu, mu_aff; double mu_scal = 0.0; for(jj=0; jj<=N; jj++) mu_scal += nb[jj]; mu_scal = 0.5/mu_scal; double sigma, sigma_decay, sigma_min; sigma = sigma_par[0]; //0.4; sigma_decay = sigma_par[1]; //0.3; sigma_min = sigma_par[2]; //0.01; // initialize ux & t>0 (slack variable) d_init_var_diag_mpc(N, nx, nu, nb, idxb, ux, pi, d, t, lam, mu0, warm_start); #if 0 for(ii=0; ii<=N; ii++) d_print_mat(1, nu[ii]+nx[ii], ux[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, 2*pnb[ii], t[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, 2*pnb[ii], lam[ii], 1); exit(1); #endif // initialize pi for(jj=0; jj<=N; jj++) for(ll=0; ll<nx[jj]; ll++) dpi[jj][ll] = 0.0; // initialize dux for(ll=0; ll<nx[0]; ll++) dux[0][nu[0]+ll] = ux[0][nu[0]+ll]; // compute the duality gap //alpha = 0.0; // needed to compute mu !!!!! //d_compute_mu_hard_mpc(N, nx, nu, nb, &mu, mu_scal, alpha, lam, dlam, t, dt); mu = mu0; // set to zero iteration count *kk = 0; // larger than minimum accepted step size alpha = 1.0; // update hessian in Riccati routine const int update_hessian = 1; //int fast_rsqrt = 0; // IP loop while( *kk<k_max && mu>mu_tol && alpha>=alpha_min ) { //update cost function matrices and vectors (box constraints) d_update_hessian_diag_mpc(N, nx, nu, nb, 0.0, t, t_inv, lam, lamt, dlam, bd, bl, pd, pl, d); #if 0 for(ii=0; ii<=N; ii++) d_print_mat(1, 2*pnb[ii], t[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, 2*pnb[ii], t_inv[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, 2*pnb[ii], lam[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, 2*pnb[ii], lamt[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, 2*pnb[ii], dlam[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, nb[ii], bd[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, nb[ii], pd[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, nb[ii], bl[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, nb[ii], pl[ii], 1); if(*kk==1) exit(1); #endif #if 0 for(ii=0; ii<=N; ii++) d_print_mat(1, nb[ii], pd[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, nb[ii], pl[ii], 1); //if(*kk==1) exit(1); #endif // update hessian & jacobian for(jj=0; jj<=N; jj++) { for(ll=0; ll<nb[jj] && idxb[jj][ll]<nu[jj]; ll++) { idx = idxb[jj][ll]; pR[jj][idx/bs*bs*cnu[jj]+idx%bs+idx*bs] = pd[jj][ll]; rq[jj][idx] = pl[jj][ll]; } for(; ll<nb[jj]; ll++) { idx = idxb[jj][ll] - nu[jj]; pQ[jj][idx/bs*bs*cnx[jj]+idx%bs+idx*bs] = pd[jj][ll]; idx = idxb[jj][ll]; rq[jj][idx] = pl[jj][ll]; } } #if 0 for(ii=0; ii<N; ii++) d_print_pmat(nu[ii], nu[ii], bs, pR[ii], cnu[ii]); for(ii=0; ii<=N; ii++) d_print_pmat(nx[ii], nx[ii], bs, pQ[ii], cnx[ii]); for(ii=0; ii<=N; ii++) d_print_mat(1, nu[ii]+nx[ii], rq[ii], 1); exit(1); #endif // compute the search direction: factorize and solve the KKT system //printf("\n%d %f\n", fast_rsqrt, mu); d_ric_diag_trf_mpc(N, nx, nu, dA, pBt, pR, pSt, pQ, pL, pK, pP, work); #if 0 for(ii=0; ii<=N; ii++) d_print_pmat(nx[ii], nx[ii], bs, pP[ii], cnx[ii]); #endif d_ric_diag_trs_mpc(N, nx, nu, dA, pBt, pL, pP, b, rq, dux, 1, Pb, compute_mult, dpi, work); #if 0 for(ii=0; ii<=N; ii++) d_print_pmat(pnu[ii]+pnx[ii], cnu[ii], bs, pL[ii], cnu[ii]); exit(1); #endif #if 0 printf("\ndux\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nu[ii]+nx[ii], dux[ii], 1); if(*kk==1) exit(1); #endif #if 1 // compute t_aff & dlam_aff & dt_aff & alpha for(jj=0; jj<=N; jj++) for(ll=0; ll<2*nb[jj]; ll++) dlam[jj][ll] = 0.0; alpha = 1.0; d_compute_alpha_diag_mpc(N, nx, nu, nb, idxb, &alpha, t, dt, lam, dlam, lamt, dux, d); stat[5*(*kk)] = sigma; stat[5*(*kk)+1] = alpha; alpha *= 0.995; //printf("\nalpha = %f\n", alpha); // compute the affine duality gap d_compute_mu_diag_mpc(N, nx, nu, nb, &mu_aff, mu_scal, alpha, lam, dlam, t, dt); stat[5*(*kk)+2] = mu_aff; //printf("\nmu = %f\n", mu_aff); //mu_aff = 1.346982; // TODO remove !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // compute sigma sigma = mu_aff/mu; sigma = sigma*sigma*sigma; // if(sigma<sigma_min) // sigma = sigma_min; //printf("\n%f %f %f %f\n", mu_aff, mu, sigma, mu_scal); //exit(1); #if 0 for(ii=0; ii<=N; ii++) d_print_mat(1, 2*pnb[ii], dt[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, 2*pnb[ii], dlam[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, 2*pnb[ii], t_inv[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, nb[ii], pl[ii], 1); //exit(1); #endif d_update_gradient_diag_mpc(N, nx, nu, nb, sigma*mu, dt, dlam, t_inv, pl); #if 0 for(ii=0; ii<=N; ii++) d_print_mat(1, nb[ii], pl[ii], 1); if(*kk==1) exit(1); #endif // update jacobian for(jj=0; jj<=N; jj++) { for(ll=0; ll<nb[jj] && idxb[jj][ll]<nu[jj]; ll++) { idx = idxb[jj][ll]; rq[jj][idx] = pl[jj][ll]; } for(; ll<nb[jj]; ll++) { idx = idxb[jj][ll]; rq[jj][idx] = pl[jj][ll]; } } // solve the system d_ric_diag_trs_mpc(N, nx, nu, dA, pBt, pL, pP, b, rq, dux, 0, Pb, compute_mult, dpi, work); //d_ric_trs_mpc(nx, nu, N, pBAbt, pL, pl, dux, work, 1, Pb, compute_mult, dpi, nb, ng, ngN, pDCt, qx); #endif #if 0 printf("\ndux\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nu[ii]+nx[ii], dux[ii], 1); if(*kk==1) exit(1); #endif // compute t & dlam & dt & alpha alpha = 1.0; d_compute_alpha_diag_mpc(N, nx, nu, nb, idxb, &alpha, t, dt, lam, dlam, lamt, dux, d); //printf("\n%f\n", alpha); //exit(1); stat[5*(*kk)] = sigma; stat[5*(*kk)+3] = alpha; alpha *= 0.995; // update x, u, lam, t & compute the duality gap mu d_update_var_diag_mpc(N, nx, nu, nb, &mu, mu_scal, alpha, ux, dux, t, dt, lam, dlam, pi, dpi); #if 0 for(ii=0; ii<=N; ii++) d_print_mat(1, nu[ii]+nx[ii], ux[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, 2*pnb[ii], t[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, 2*pnb[ii], lam[ii], 1); exit(1); #endif stat[5*(*kk)+4] = mu; // update sigma // sigma *= sigma_decay; // if(sigma<sigma_min) // sigma = sigma_min; // if(alpha<0.3) // sigma = sigma_par[0]; #if 0 d_print_mat(1, 2*pnb+2*png, lam[0], 1); d_print_mat(1, 2*pnb+2*png, lam[1], 1); d_print_mat(1, 2*pnb+2*png, lam[N], 1); d_print_mat(1, 2*pnb+2*png, t[0], 1); d_print_mat(1, 2*pnb+2*png, t[1], 1); d_print_mat(1, 2*pnb+2*png, t[N], 1); printf("\n%f\n", mu); exit(1); #endif //mu = 13.438997; // increment loop index (*kk)++; } // end of IP loop #if 0 for(ii=0; ii<=N; ii++) d_print_mat(1, nu[ii]+nx[ii], rq[ii], 1); #endif // restore Hessian //for(jj=0; jj<=N; jj++) // { // for(ll=0; ll<nu[jj]; ll++) // pR[jj][(ll/bs)*bs*cnu[jj]+ll%bs+ll*bs] = bd[jj][ll]; // for(ll=0; ll<nx[jj]; ll++) // pQ[jj][(ll/bs)*bs*cnx[jj]+ll%bs+ll*bs] = bd[jj][nu[jj]+ll]; // } for(jj=0; jj<=N; jj++) { for(ll=0; ll<nb[jj] && idxb[jj][ll]<nu[jj]; ll++) { idx = idxb[jj][ll]; pR[jj][idx/bs*bs*cnu[jj]+idx%bs+idx*bs] = bd[jj][ll]; rq[jj][idx] = bl[jj][ll]; } for(; ll<nb[jj]; ll++) { idx = idxb[jj][ll] - nu[jj]; pQ[jj][idx/bs*bs*cnx[jj]+idx%bs+idx*bs] = bd[jj][ll]; idx = idxb[jj][ll]; rq[jj][idx] = bl[jj][ll]; } } #if 0 for(ii=0; ii<N; ii++) d_print_pmat(nu[ii], nu[ii], bs, pR[ii], cnu[ii]); for(ii=0; ii<=N; ii++) d_print_pmat(nx[ii], nx[ii], bs, pQ[ii], cnx[ii]); for(ii=0; ii<=N; ii++) d_print_mat(1, nb[ii], bl[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, nu[ii]+nx[ii], rq[ii], 1); exit(1); #endif // successful exit if(mu<=mu_tol) return 0; // max number of iterations reached if(*kk>=k_max) return 1; // no improvement if(alpha<alpha_min) return 2; // impossible return -1; } // end of ipsolver
/* primal-dual interior-point method, hard constraints, time variant matrices, time variant size (mpc version) */ int d_ip2_hard_mpc_tv(int *kk, int k_max, double mu0, double mu_tol, double alpha_min, int warm_start, double *sigma_par, double *stat, int N, int *nx, int *nu, int *nb, int **idxb, int *ng, double **pBAbt, double **pQ, double **pDCt, double **d, double **ux, int compute_mult, double **pi, double **lam, double **t, double *double_work_memory, int *int_work_memory) { // indeces int jj, ll, ii, bs0; // constants const int bs = D_MR; const int ncl = D_NCL; const int nal = bs*ncl; // number of doubles per cache line // matrices size // work_space_int_size_per_stage = 7 int idx; int nxM = 0; int nzM = 0; int ngM = 0; int *ptr_int, *anx, *anz, *pnz, *pnb, *png, *cnx, *cnz; ptr_int = int_work_memory; // no alignmenr requirements anx = ptr_int; ptr_int += N+1; anz = ptr_int; ptr_int += N+1; pnz = ptr_int; ptr_int += N+1; pnb = ptr_int; ptr_int += N+1; png = ptr_int; ptr_int += N+1; cnx = ptr_int; ptr_int += N+1; cnz = ptr_int; ptr_int += N+1; for(jj=0; jj<=N; jj++) { anx[jj] = (nx[jj]+nal-1)/nal*nal; anz[jj] = (nu[jj]+nx[jj]+1+nal-1)/nal*nal; pnz[jj] = (nu[jj]+nx[jj]+1+bs-1)/bs*bs; pnb[jj] = (nb[jj]+bs-1)/bs*bs; png[jj] = (ng[jj]+bs-1)/bs*bs; cnx[jj] = (nx[jj]+ncl-1)/ncl*ncl; cnz[jj] = (nu[jj]+nx[jj]+1+ncl-1)/ncl*ncl; if(nx[jj]>nxM) nxM = nx[jj]; if(nu[jj]+nx[jj]+1>nzM) nzM = nu[jj]+nx[jj]+1; if(ng[jj]>ngM) ngM = ng[jj]; } // initialize work space // work_space_double_size_per_stage = pnz*cnl + 2*anz + 2*anx + 14*pnb + 10*png // work_space_double_size_const_max = pnz*cnxg + pnz double *ptr; ptr = double_work_memory; // supposed to be aligned to cache line boundaries double *(pL[N+1]); double *(l[N+1]); double *work; double *(q[N+1]); double *(dux[N+1]); double *(dpi[N+1]); double *(pd[N+1]); // pointer to diagonal of Hessian double *(pl[N+1]); // pointer to linear part of Hessian double *(bd[N+1]); // backup diagonal of Hessian double *(bl[N+1]); // backup linear part of Hessian double *diag; double *(dlam[N+1]); double *(dt[N+1]); double *(lamt[N+1]); double *(t_inv[N+1]); double *(Qx[N+1]); double *(qx[N+1]); double *(qx2[N+1]); double *(Pb[N]); // work space for(jj=0; jj<=N; jj++) { pL[jj] = ptr; ptr += pnz[jj] * ( cnx[jj]+ncl>cnz[jj] ? cnx[jj]+ncl : cnz[jj] ); // pnz*cnl } for(jj=0; jj<=N; jj++) { l[jj] = ptr; ptr += anz[jj]; } work = ptr; ptr += ((nzM+bs-1)/bs*bs) * ((nxM+ngM+ncl-1)/ncl*ncl); // pnzM*cnxgM // inputs and states for(jj=0; jj<=N; jj++) { dux[jj] = ptr; ptr += anz[jj]; } // equality constr multipliers for(jj=0; jj<=N; jj++) { dpi[jj] = ptr; ptr += anx[jj]; } // backup of P*b for(jj=0; jj<N; jj++) { Pb[jj] = ptr; ptr += anx[jj+1]; } // linear part of cost function for(jj=0; jj<=N; jj++) { q[jj] = ptr; ptr += anz[jj]; for(ll=0; ll<nu[jj]+nx[jj]; ll++) q[jj][ll] = pQ[jj][(nu[jj]+nx[jj])/bs*bs*cnz[jj]+(nu[jj]+nx[jj])%bs+ll*bs]; } // Hessian backup for(jj=0; jj<=N; jj++) { pd[jj] = ptr; pl[jj] = ptr + pnb[jj]; bd[jj] = ptr + 2*pnb[jj]; bl[jj] = ptr + 3*pnb[jj]; ptr += 4*pnb[jj]; // backup for(ll=0; ll<nb[jj]; ll++) { idx = idxb[jj][ll]; bd[jj][ll] = pQ[jj][idx/bs*bs*cnz[jj]+idx%bs+idx*bs]; bl[jj][ll] = q[jj][idx]; } } diag = ptr; ptr += (nzM+bs-1)/bs*bs; // pnzM // slack variables, Lagrangian multipliers for inequality constraints and work space for(jj=0; jj<=N; jj++) { dlam[jj] = ptr; dt[jj] = ptr + 2*pnb[jj]+2*png[jj]; ptr += 4*pnb[jj]+4*png[jj]; } for(jj=0; jj<=N; jj++) { lamt[jj] = ptr; ptr += 2*pnb[jj]+2*png[jj]; } for(jj=0; jj<=N; jj++) { t_inv[jj] = ptr; ptr += 2*pnb[jj]+2*png[jj]; } for(jj=0; jj<=N; jj++) { Qx[jj] = ptr; qx[jj] = ptr+pnb[jj]+png[jj]; qx2[jj] = ptr+2*pnb[jj]+2*png[jj]; ptr += 3*pnb[jj]+3*png[jj]; } double temp0, temp1; double alpha, mu, mu_aff; double mu_scal = 0.0; for(jj=0; jj<=N; jj++) mu_scal += 2*nb[jj] + 2*ng[jj]; //printf("\nmu_scal = %f\n", mu_scal); mu_scal = 1.0 / mu_scal; //printf("\nmu_scal = %f\n", mu_scal); double sigma, sigma_decay, sigma_min; //for(ii=0; ii<=N; ii++) // printf("\n%d %d\n", nb[ii], ng[ii]); //exit(1); sigma = sigma_par[0]; //0.4; sigma_decay = sigma_par[1]; //0.3; sigma_min = sigma_par[2]; //0.01; // initialize ux & t>0 (slack variable) d_init_var_hard_mpc_tv(N, nx, nu, nb, idxb, ng, ux, pi, pDCt, d, t, lam, mu0, warm_start); // initialize pi for(jj=0; jj<=N; jj++) for(ll=0; ll<nx[jj]; ll++) dpi[jj][ll] = 0.0; // initialize dux for(ll=0; ll<nx[0]; ll++) dux[0][nu[0]+ll] = ux[0][nu[0]+ll]; // compute the duality gap //alpha = 0.0; // needed to compute mu !!!!! //d_compute_mu_hard_mpc(N, nx, nu, nb, &mu, mu_scal, alpha, lam, dlam, t, dt); mu = mu0; // set to zero iteration count *kk = 0; // larger than minimum accepted step size alpha = 1.0; // update hessian in Riccati routine const int update_hessian = 1; int fast_rsqrt = 0; // IP loop while( *kk<k_max && mu>mu_tol && alpha>=alpha_min ) { //update cost function matrices and vectors (box constraints) d_update_hessian_hard_mpc_tv(N, nx, nu, nb, ng, 0.0, t, t_inv, lam, lamt, dlam, Qx, qx, qx2, bd, bl, pd, pl, d); #if 0 for(ii=0; ii<=N; ii++) d_print_mat(1, nb[ii], pd[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, nb[ii], pl[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, ng[ii], Qx[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, ng[ii], qx[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, ng[ii], qx2[ii], 1); if(*kk==1) exit(1); #endif // compute the search direction: factorize and solve the KKT system #if defined(FAST_RSQRT) if(mu>1e-2) fast_rsqrt = 2; else { if(mu>1e-4) fast_rsqrt = 1; else fast_rsqrt = 0; } #else fast_rsqrt = 0; #endif //printf("\n%d %f\n", fast_rsqrt, mu); d_ric_sv_mpc_tv(N, nx, nu, pBAbt, pQ, dux, pL, work, diag, 1, Pb, compute_mult, dpi, nb, idxb, pd, pl, ng, pDCt, Qx, qx2, fast_rsqrt); #if 0 for(ii=0; ii<=N; ii++) d_print_pmat(nu[ii]+nx[ii]+1, nu[ii]+nx[ii]+1, bs, pQ[ii], cnz[ii]); //exit(1); #endif #if 0 for(ii=0; ii<=N; ii++) d_print_pmat(pnz[ii], cnz[ii], bs, pL[ii], cnz[ii]); //exit(1); #endif #if 0 printf("\ndux\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nu[ii]+nx[ii], dux[ii], 1); if(*kk==1) exit(1); #endif #if 1 // compute t_aff & dlam_aff & dt_aff & alpha alpha = 1.0; d_compute_alpha_hard_mpc_tv(N, nx, nu, nb, idxb, ng, &alpha, t, dt, lam, dlam, lamt, dux, pDCt, d); stat[5*(*kk)] = sigma; stat[5*(*kk)+1] = alpha; alpha *= 0.995; //printf("\nalpha = %f\n", alpha); // compute the affine duality gap d_compute_mu_hard_mpc_tv(N, nx, nu, nb, ng, &mu_aff, mu_scal, alpha, lam, dlam, t, dt); stat[5*(*kk)+2] = mu_aff; //printf("\nmu = %f\n", mu_aff); // compute sigma sigma = mu_aff/mu; sigma = sigma*sigma*sigma; // if(sigma<sigma_min) // sigma = sigma_min; //printf("\n%f %f %f %f\n", mu_aff, mu, sigma, mu_scal); //exit(1); #if 0 for(ii=0; ii<=N; ii++) d_print_mat(1, 2*pnb[ii], dt[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, 2*pnb[ii], dlam[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, 2*pnb[ii], t_inv[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, nb[ii], pl[ii], 1); //exit(1); #endif d_update_gradient_hard_mpc_tv(N, nx, nu, nb, ng, sigma*mu, dt, dlam, t_inv, pl, qx); #if 0 for(ii=0; ii<=N; ii++) d_print_mat(1, nb[ii], pl[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, ng[ii], qx[ii], 1); if(*kk==1) exit(1); #endif // copy b into x for(ii=0; ii<N; ii++) for(jj=0; jj<nx[ii+1]; jj++) dux[ii+1][nu[ii+1]+jj] = pBAbt[ii][(nu[ii]+nx[ii])/bs*bs*cnx[ii+1]+(nu[ii]+nx[ii])%bs+bs*jj]; // copy b // solve the system d_ric_trs_mpc_tv(N, nx, nu, pBAbt, pL, q, l, dux, work, 0, Pb, compute_mult, dpi, nb, idxb, pl, ng, pDCt, qx); #if 0 printf("\ndux\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nu[ii]+nx[ii], dux[ii], 1); if(*kk==1) exit(1); #endif #endif // compute t & dlam & dt & alpha alpha = 1.0; d_compute_alpha_hard_mpc_tv(N, nx, nu, nb, idxb, ng, &alpha, t, dt, lam, dlam, lamt, dux, pDCt, d); stat[5*(*kk)] = sigma; stat[5*(*kk)+3] = alpha; alpha *= 0.995; // update x, u, lam, t & compute the duality gap mu d_update_var_hard_mpc_tv(N, nx, nu, nb, ng, &mu, mu_scal, alpha, ux, dux, t, dt, lam, dlam, pi, dpi); stat[5*(*kk)+4] = mu; // update sigma /* sigma *= sigma_decay;*/ /* if(sigma<sigma_min)*/ /* sigma = sigma_min;*/ /* if(alpha<0.3)*/ /* sigma = sigma_par[0];*/ // increment loop index (*kk)++; } // end of IP loop // restore Hessian for(jj=0; jj<=N; jj++) { for(ll=0; ll<nb[jj]; ll++) { idx = idxb[jj][ll]; pQ[jj][idx/bs*bs*cnz[jj]+idx%bs+idx*bs] = bd[jj][ll]; pQ[jj][(nu[jj]+nx[jj])/bs*bs*cnz[jj]+(nu[jj]+nx[jj])%bs+idx*bs] = bl[jj][ll]; } } // successful exit if(mu<=mu_tol) return 0; // max number of iterations reached if(*kk>=k_max) return 1; // no improvement if(alpha<alpha_min) return 2; // impossible return -1; } // end of ipsolver
int main() { printf("\n"); printf("\n"); printf("\n"); printf(" HPMPC -- Library for High-Performance implementation of solvers for MPC.\n"); printf(" Copyright (C) 2014-2015 by Technical University of Denmark. All rights reserved.\n"); printf("\n"); printf(" HPMPC is distributed in the hope that it will be useful,\n"); printf(" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"); printf(" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"); printf(" See the GNU Lesser General Public License for more details.\n"); printf("\n"); printf("\n"); printf("\n"); int ii, jj, ll; double **dummy; int ** int_dummy; const int bs = D_MR; //d_get_mr(); const int ncl = D_NCL; const int nal = bs*ncl; // number of doubles per cache line int nx, nu, N, nrep; // timing variables float time_ric_diag, time_ric_full, time_ric_full_tv, time_ip_diag, time_ip_full, time_ip_full_tv; /************************************************ * test of riccati eye/diag & size-variant ************************************************/ #if 1 // horizon length N = 11; // base nx and nu int nx0 = 2; int nu0 = 1; // size-varing int nxx[N+1]; for(ii=0; ii<=N; ii++) nxx[ii] = (N+1-ii)*nx0 + nu0; int pnxx[N+1]; for(ii=0; ii<=N; ii++) pnxx[ii] = (nxx[ii]+bs-1)/bs*bs; int cnxx[N+1]; for(ii=0; ii<=N; ii++) cnxx[ii] = (nxx[ii]+ncl-1)/ncl*ncl; int nuu[N+1]; for(ii=0; ii<N; ii++) nuu[ii] = nu0; nuu[N] = 0; // !!!!! int pnuu[N+1]; for(ii=0; ii<N; ii++) pnuu[ii] = (nuu[ii]+bs-1)/bs*bs; pnuu[N] = 0; // !!!!! int cnuu[N+1]; for(ii=0; ii<N; ii++) cnuu[ii] = (nuu[ii]+ncl-1)/ncl*ncl; cnuu[N] = 0; // !!!!! //for(ii=0; ii<=N; ii++) printf("\n%d %d %d\n", nxx[ii], pnxx[ii], cnxx[ii]); //for(ii=0; ii<N; ii++) printf("\n%d %d %d\n", nuu[ii], pnuu[ii], cnuu[ii]); // factorization printf("\nRiccati diag\n\n"); // data memory space double *hdA[N]; double *hpBt[N]; double *hpR[N]; double *hpS[N]; double *hpQ[N+1]; double *hpLK[N]; double *hpP[N+1]; double *pK; for(ii=0; ii<N; ii++) { d_zeros_align(&hdA[ii], pnxx[ii], 1); d_zeros_align(&hpBt[ii], pnuu[ii], cnxx[ii+1]); d_zeros_align(&hpR[ii], pnuu[ii], cnuu[ii]); d_zeros_align(&hpS[ii], pnxx[ii], cnuu[ii]); d_zeros_align(&hpQ[ii], pnxx[ii], cnxx[ii]); d_zeros_align(&hpLK[ii], pnuu[ii]+pnxx[ii], cnuu[ii]); d_zeros_align(&hpP[ii], pnxx[ii], cnxx[ii]); } d_zeros_align(&hpQ[N], pnxx[N], cnxx[N]); d_zeros_align(&hpP[N], pnxx[N], cnxx[N]); d_zeros_align(&pK, pnxx[0], cnuu[0]); // max(nx) x nax(nu) // dA for(ii=0; ii<N; ii++) for(jj=0; jj<nxx[ii+1]; jj++) hdA[ii][jj] = 1.0; //d_print_mat(1, cnxx[1], hdA[0], 1); // B double *eye_nu0; d_zeros(&eye_nu0, nu0, nu0); for(jj=0; jj<nu0; jj++) eye_nu0[jj*(nu0+1)] = 1.0; double *ptrB = BBB; for(ii=0; ii<N; ii++) { d_cvt_mat2pmat(nuu[ii], nuu[ii], eye_nu0, nuu[ii], 0, hpBt[ii], cnxx[ii+1]); d_cvt_tran_mat2pmat(nxx[ii+1]-nuu[ii], nuu[ii], ptrB, nxx[ii+1]-nuu[ii], 0, hpBt[ii]+nuu[ii]*bs, cnxx[ii+1]); ptrB += nxx[ii+1] - nuu[ii]; } free(eye_nu0); //d_print_pmat(pnuu[0], cnxx[1], bs, hpBt[0], cnxx[0]); //d_print_pmat(pnuu[1], cnxx[2], bs, hpBt[1], cnxx[1]); //d_print_pmat(pnuu[2], cnxx[3], bs, hpBt[2], cnxx[2]); //d_print_pmat(pnuu[N-1], cnxx[N-1], bs, hpBt[N-2], cnxx[N-2]); //d_print_pmat(pnuu[N-1], cnxx[N], bs, hpBt[N-1], cnxx[N-1]); // R // penalty on du for(ii=0; ii<N; ii++) for(jj=0; jj<nuu[ii]; jj++) hpR[ii][jj/bs*bs*cnuu[ii]+jj%bs+jj*bs] = 0.0; //for(ii=0; ii<N; ii++) // d_print_pmat(pnuu[ii], cnuu[ii], bs, hpR[ii], pnuu[ii]); //d_print_pmat(pnuu[0], cnuu[0], bs, hpR[0], pnuu[0]); // S (zero) // Q for(ii=0; ii<=N; ii++) { // penalty on u for(jj=0; jj<nu0; jj++) hpQ[ii][jj/bs*bs*cnxx[ii]+jj%bs+jj*bs] = 1.0; // penalty on x // for(jj==1; jj<nxx[ii]-nx0; jj++) // hpQ[ii][jj/bs*bs*cnxx[ii]+jj%bs+jj*bs] = 0.0002; for(jj=nxx[ii]-nx0; jj<nxx[ii]; jj++) hpQ[ii][jj/bs*bs*cnxx[ii]+jj%bs+jj*bs] = 1.0; } //for(ii=0; ii<=N; ii++) // d_print_pmat(pnxx[ii], cnxx[ii], bs, hpQ2[ii], cnxx[ii]); //d_print_pmat(pnxx[0], cnxx[0], bs, hpQ2[0], cnxx[0]); //d_print_pmat(pnxx[1], cnxx[1], bs, hpQ2[1], cnxx[1]); //d_print_pmat(pnxx[N-1], cnxx[N-1], bs, hpQ2[N-1], cnxx[N-1]); //d_print_pmat(pnxx[N], cnxx[N], bs, hpQ2[N], cnxx[N]); //exit(1); // work space double *diag; d_zeros_align(&diag, pnxx[0]+pnuu[0], 1); // factorization printf("\nfactorization ...\n"); d_ric_diag_trf_mpc(N, nxx, nuu, hdA, hpBt, hpR, hpS, hpQ, hpLK, pK, hpP, diag); printf("\nfactorization done\n\n"); #if 1 //d_print_pmat(nxx[0], nxx[0], bs, hpP[0], cnxx[0]); //d_print_pmat(nxx[1], nxx[1], bs, hpP[1], cnxx[1]); //d_print_pmat(nxx[N-2], nxx[N-2], bs, hpP[N-2], cnxx[N-2]); //d_print_pmat(nxx[N-1], nxx[N-1], bs, hpP[N-1], cnxx[N-1]); //d_print_pmat(nxx[N], nxx[N], bs, hpP[N], cnxx[N]); //for(ii=0; ii<=N; ii++) // d_print_pmat(pnuu[ii]+nxx[ii], nuu[ii], bs, hpLK[ii], cnuu[ii]); //d_print_pmat(pnuu[0]+nxx[0], nuu[0], bs, hpLK[0], cnuu[0]); //d_print_pmat(pnuu[1]+nxx[1], nuu[1], bs, hpLK[1], cnuu[1]); //d_print_pmat(pnuu[2]+nxx[2], nuu[2], bs, hpLK[2], cnuu[2]); //d_print_pmat(pnuu[N-3]+nxx[N-3], nuu[N-3], bs, hpLK[N-3], cnuu[N-3]); //d_print_pmat(pnuu[N-2]+nxx[N-2], nuu[N-2], bs, hpLK[N-2], cnuu[N-2]); //d_print_pmat(pnuu[N-1]+nxx[N-1], nuu[N-1], bs, hpLK[N-1], cnuu[N-1]); #endif // backward-forward solution // data memory space double *hrq[N+1]; double *hux[N+1]; double *hpi[N+1]; double *hPb[N]; double *hb[N]; for(ii=0; ii<N; ii++) { d_zeros_align(&hrq[ii], pnuu[ii]+pnxx[ii], 1); d_zeros_align(&hux[ii], pnuu[ii]+pnxx[ii], 1); d_zeros_align(&hpi[ii], pnxx[ii], 1); d_zeros_align(&hPb[ii], pnxx[ii+1], 1); d_zeros_align(&hb[ii], pnxx[ii+1], 1); } d_zeros_align(&hrq[N], pnuu[N]+pnxx[N], 1); d_zeros_align(&hux[N], pnuu[N]+pnxx[N], 1); d_zeros_align(&hpi[N], pnxx[N], 1); double *work_diag; d_zeros_align(&work_diag, pnxx[0], 1); for(ii=0; ii<=N; ii++) for(jj=0; jj<nuu[ii]; jj++) hrq[ii][jj] = 0.0; for(ii=0; ii<=N; ii++) for(jj=0; jj<nxx[ii]; jj++) hrq[ii][nuu[ii]+jj] = 0.0; for(ii=0; ii<N; ii++) for(jj=0; jj<nxx[ii+1]; jj++) hb[ii][jj] = 0.0; // x0 for(jj=0; jj<nuu[0]; jj++) { hux[0][jj] = 0.0; } for(; jj<nuu[0]+nu0; jj++) { hux[0][jj] = 7.5097; } for(; jj<nxx[0]; jj+=2) { hux[0][jj+0] = 15.01940; hux[0][jj+1] = 0.0; } //d_print_mat(1, nuu[0]+nxx[0], hux2[0], 1); printf("\nbackward-forward solution ...\n"); d_ric_diag_trs_mpc(N, nxx, nuu, hdA, hpBt, hpLK, hpP, hb, hrq, hux, 1, hPb, 1, hpi, work_diag); printf("\nbackward-forward solution done\n\n"); #if 1 printf("\nux\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nuu[ii]+nxx[ii], hux[ii], 1); #endif // residuals // data memory space double *hres_rq[N+1]; double *hres_b[N]; for(ii=0; ii<N; ii++) { d_zeros_align(&hres_rq[ii], pnuu[ii]+pnxx[ii], 1); d_zeros_align(&hres_b[ii], pnxx[ii+1], 1); } d_zeros_align(&hres_rq[N], pnuu[N]+pnxx[N], 1); printf("\nresuduals ...\n"); d_res_diag_mpc(N, nxx, nuu, hdA, hpBt, hpR, hpS, hpQ, hb, hrq, hux, hpi, hres_rq, hres_b, work_diag); printf("\nresiduals done\n\n"); #if 1 printf("\nres_q\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nuu[ii]+nxx[ii], hres_rq[ii], 1); printf("\nres_b\n"); for(ii=0; ii<N; ii++) d_print_mat(1, nxx[ii+1], hres_b[ii], 1); #endif // timing struct timeval tv20, tv21; #if 1 printf("\ntiming ...\n\n"); gettimeofday(&tv20, NULL); // start nrep = 10000; for(ii=0; ii<nrep; ii++) { d_ric_diag_trf_mpc(N, nxx, nuu, hdA, hpBt, hpR, hpS, hpQ, hpLK, pK, hpP, diag); d_ric_diag_trs_mpc(N, nxx, nuu, hdA, hpBt, hpLK, hpP, hb, hrq, hux, 1, hPb, 1, hpi, work_diag); } gettimeofday(&tv21, NULL); // start time_ric_diag = (float) (tv21.tv_sec-tv20.tv_sec)/(nrep+0.0)+(tv21.tv_usec-tv20.tv_usec)/(nrep*1e6); printf("\ntiming done\n\n"); #endif #if 1 printf("\nRiccati full\n\n"); // size-variant full int nzz[N+1]; for(ii=0; ii<=N; ii++) nzz[ii] = nuu[ii] + nxx[ii] + 1; int pnzz[N+1]; for(ii=0; ii<=N; ii++) pnzz[ii] = (nzz[ii]+bs-1)/bs*bs; int cnzz[N+1]; for(ii=0; ii<=N; ii++) cnzz[ii] = (nzz[ii]+ncl-1)/ncl*ncl; int anzz[N+1]; for(ii=0; ii<=N; ii++) anzz[ii] = (nzz[ii]+nal-1)/nal*nal; int cnll[N+1]; for(ii=0; ii<=N; ii++) cnll[ii] = cnzz[ll]<cnxx[ll]+ncl ? cnxx[ll]+ncl : cnzz[ll]; int nzero[N+1]; for(ii=0; ii<=N; ii++) nzero[ii] = 0; double *hpBAbt_tv[N]; double *hpRSQ_tv[N+1]; double *hpL_tv[N+1]; double *hl[N+1]; for(ii=0; ii<N; ii++) { d_zeros_align(&hpBAbt_tv[ii], pnzz[ii], cnxx[ii+1]); d_zeros_align(&hpRSQ_tv[ii], pnzz[ii], cnzz[ii]); d_zeros_align(&hpL_tv[ii], pnzz[ii], cnll[ii]); d_zeros_align(&hl[ii], anzz[ii], 1); } d_zeros_align(&hpRSQ_tv[N], pnzz[N], cnzz[N]); d_zeros_align(&hpL_tv[N], pnzz[N], cnll[N]); d_zeros_align(&hl[N], anzz[ii], 1); double *work_ric_tv; d_zeros_align(&work_ric_tv, pnzz[0], cnxx[0]); for(ii=0; ii<N; ii++) { d_copy_pmat(nuu[ii], nxx[ii+1], bs, hpBt[ii], cnxx[ii], hpBAbt_tv[ii], cnxx[ii+1]); for(jj=0; jj<nxx[ii+1]; jj++) hpBAbt_tv[ii][(nuu[ii]+jj)/bs*bs*cnxx[ii+1]+(nuu[ii]+jj)%bs+jj*bs] = 1.0; for(jj=0; jj<nxx[ii+1]; jj++) hpBAbt_tv[ii][(nuu[ii]+nxx[ii])/bs*bs*cnxx[ii+1]+(nuu[ii]+nxx[ii])%bs+jj*bs] = hb[ii][jj]; //d_print_pmat(nzz[ii], nxx[ii+1], bs, hpBAbt_tv[ii], cnxx[ii+1]); } for(ii=0; ii<=N; ii++) { // R // penalty on du for(jj=0; jj<nuu[ii]; jj++) hpRSQ_tv[ii][jj/bs*bs*cnzz[ii]+jj%bs+jj*bs] = 0.0; // Q // penalty on u for(; jj<nuu[ii]+nu0; jj++) hpRSQ_tv[ii][jj/bs*bs*cnzz[ii]+jj%bs+jj*bs] = 1.0; // penalty on x for(jj=nuu[ii]+nxx[ii]-nx0; jj<nuu[ii]+nxx[ii]; jj++) hpRSQ_tv[ii][jj/bs*bs*cnzz[ii]+jj%bs+jj*bs] = 1.0; // r q for(jj=0; jj<nuu[ii]+nxx[ii]; jj++) hpRSQ_tv[ii][(nuu[ii]+nxx[ii])/bs*bs*cnzz[ii]+(nuu[ii]+nxx[ii])%bs+jj*bs] = hrq[ii][jj]; //d_print_pmat(nzz[ii], nzz[ii], bs, hpRSQ_tv[ii], cnzz[ii]); } printf("\nfactorization and backward-forward solution ...\n"); d_ric_sv_mpc_tv(N, nxx, nuu, hpBAbt_tv, hpRSQ_tv, hux, hpL_tv, work_ric_tv, diag, COMPUTE_MULT, hpi, nzero, int_dummy, dummy, dummy, nzero, dummy, dummy, dummy, 0); printf("\nfactorization and backward-forward solution done\n\n"); #if 0 for(ii=0; ii<=N; ii++) d_print_pmat(nzz[ii], nzz[ii], bs, hpL_tv[ii], cnzz[ii]); #endif printf("\nux\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nuu[ii]+nxx[ii], hux[ii], 1); for(ii=0; ii<N; ii++) for(jj=0; jj<nxx[ii+1]; jj++) hux[ii+1][nuu[ii+1]+jj] = hb[ii][jj]; printf("\nbackward-forward solution ...\n"); d_ric_trs_mpc_tv(N, nxx, nuu, hpBAbt_tv, hpL_tv, hrq, hl, hux, work_ric_tv, 1, hPb, COMPUTE_MULT, hpi, nzero, int_dummy, dummy, nzero, dummy, dummy); printf("\nbackward-forward solution done\n\n"); printf("\nux\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nuu[ii]+nxx[ii], hux[ii], 1); //exit(1); printf("\nresuduals ...\n"); d_res_diag_mpc(N, nxx, nuu, hdA, hpBt, hpR, hpS, hpQ, hb, hrq, hux, hpi, hres_rq, hres_b, work_diag); printf("\nresiduals done\n\n"); #if 1 printf("\nres_q\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nuu[ii]+nxx[ii], hres_rq[ii], 1); printf("\nres_b\n"); for(ii=0; ii<N; ii++) d_print_mat(1, nxx[ii+1], hres_b[ii], 1); #endif #if 1 printf("\ntiming ...\n\n"); gettimeofday(&tv20, NULL); // start nrep = 10000; for(ii=0; ii<nrep; ii++) { d_ric_sv_mpc_tv(N, nxx, nuu, hpBAbt_tv, hpRSQ_tv, hux, hpL_tv, work_ric_tv, diag, COMPUTE_MULT, hpi, nzero, int_dummy, dummy, dummy, nzero, dummy, dummy, dummy, 0); } gettimeofday(&tv21, NULL); // start time_ric_full_tv = (float) (tv21.tv_sec-tv20.tv_sec)/(nrep+0.0)+(tv21.tv_usec-tv20.tv_usec)/(nrep*1e6); printf("\ntiming done\n\n"); #endif #endif #if 1 // IPM printf("\nIPM diag\n\n"); int kk = -1; int kmax = 50; double mu0 = 1; double mu_tol = 1e-8; double alpha_min = 1e-12; double sigma_par[] = {0.4, 0.3, 0.01}; double stat[5*50] = {}; int nbb[N+1]; nbb[0] = nu0;//nuu[0]; // XXX !!!!!!!!!!!!!! for(ii=1; ii<N; ii++) nbb[ii] = 2*nu0 + nx0; //nuu[ii] + nxx[ii]; nbb[N] = nu0 + nx0; int *(idxb[N+1]); for(ii=0; ii<=N; ii++) { idxb[ii] = (int *) malloc(nbb[ii]*sizeof(int)); } int pnbb[N+1]; for(ii=0; ii<=N; ii++) pnbb[ii] = (nbb[ii]+bs-1)/bs*bs; // data memory space double *hd[N+1]; double *hlam[N+1]; double *ht[N+1]; double *hres_d[N+1]; for(ii=0; ii<=N; ii++) { d_zeros_align(&hd[ii], 2*pnbb[ii], 1); d_zeros_align(&hlam[ii], 2*pnbb[ii], 1); d_zeros_align(&ht[ii], 2*pnbb[ii], 1); d_zeros_align(&hres_d[ii], 2*pnbb[ii], 1); } double mu = -1; //printf("\nbounds\n"); ii = 0; // initial stage ll = 0; for(jj=0; jj<nuu[ii]; jj++) { hd[ii][ll] = -20.5; hd[ii][pnbb[ii]+ll] = -20.5; idxb[ii][ll] = jj; ll++; } //d_print_mat(1, 2*pnbb[ii], hd[ii], 1); for(ii=1; ii<=N; ii++) { ll = 0; for(jj=0; jj<nuu[ii]; jj++) { hd[ii][ll] = -20.5; hd[ii][pnbb[ii]+ll] = -20.5; idxb[ii][ll] = jj; ll++; } for(; jj<nuu[ii]+nu0; jj++) { hd[ii][ll] = - 2.5; // -2.5 hd[ii][pnbb[ii]+ll] = -10.0; // -10 idxb[ii][ll] = jj; ll++; } //for(; jj<nbb[ii]-nx0; jj++) //for(; jj<nbb[ii]; jj++) //{ //hd[ii][jj] = -100.0; //hd[ii][pnbb[ii]+jj] = -100.0; //idxb[ii][ll] = jj; //ll++; //} jj += nx0*(N-ii); hd[ii][ll+0] = - 0.0; // 0 hd[ii][pnbb[ii]+ll+0] = -20.0; // -20 idxb[ii][ll] = jj; ll++; jj++; hd[ii][ll+0] = -10.0; // -10 hd[ii][pnbb[ii]+ll+0] = -10.0; // -10 idxb[ii][ll] = jj; ll++; jj++; //d_print_mat(1, 2*pnbb[ii], hd[ii], 1); } #if 0 for(ii=0; ii<=N; ii++) { for(jj=0; jj<nbb[ii]; jj++) printf("%d\t", idxb[ii][jj]); printf("\n"); } exit(1); #endif for(jj=0; jj<nuu[0]; jj++) { hux[0][jj] = 0.0; } for(; jj<nuu[0]+nu0; jj++) { hux[0][jj] = 7.5097; } for(; jj<nxx[0]; jj+=2) { hux[0][jj+0] = 15.01940; hux[0][jj+1] = 0.0; } //d_print_mat(1, nuu[0]+nxx[0], hux2[0], 1); int pnxM = pnxx[0]; int pnuM = pnuu[0]; int cnuM = cnuu[0]; int anxx[N+1]; for(ii=0; ii<=N; ii++) anxx[ii] = (nxx[ii]+nal-1)/nal*nal; int anuu[N+1]; for(ii=0; ii<=N; ii++) anuu[ii] = (nuu[ii]+nal-1)/nal*nal; int work_space_ip_double = 0; for(ii=0; ii<=N; ii++) work_space_ip_double += anuu[ii] + 3*anxx[ii] + (pnuu[ii]+pnxx[ii])*cnuu[ii] + pnxx[ii]*cnxx[ii] + 12*pnbb[ii]; work_space_ip_double += pnxM*cnuM + pnxM + pnuM; int work_space_ip_int = (N+1)*7*sizeof(int); work_space_ip_int = (work_space_ip_int+63)/64*64; work_space_ip_int /= sizeof(int); printf("\nIPM diag work space size: %d double + %d int\n\n", work_space_ip_double, work_space_ip_int); double *work_space_ip; d_zeros_align(&work_space_ip, work_space_ip_double+(work_space_ip_int+1)/2, 1); // XXX assume sizeof(double) = 2 * sizeof(int) !!!!! printf("\nIPM solution ...\n"); d_ip2_diag_mpc(&kk, kmax, mu0, mu_tol, alpha_min, 0, sigma_par, stat, N, nxx, nuu, nbb, idxb, hdA, hpBt, hpR, hpS, hpQ, hb, hd, hrq, hux, 1, hpi, hlam, ht, work_space_ip); printf("\nIPM solution done\n"); printf("\nux\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nuu[ii]+nxx[ii], hux[ii], 1); printf("\nlam\n"); for(ii=0; ii<=N; ii++) { d_print_mat(1, nbb[ii], hlam[ii], 1); d_print_mat(1, nbb[ii], hlam[ii]+pnbb[ii], 1); } printf("\nt\n"); for(ii=0; ii<=N; ii++) { d_print_mat(1, nbb[ii], ht[ii], 1); d_print_mat(1, nbb[ii], ht[ii]+pnbb[ii], 1); } printf("\nstatistics\n\n"); for(ii=0; ii<kk; ii++) printf("%d\t%f\t%f\t%f\t%e\t%f\t%f\t%e\n", ii+1, stat[5*ii+0], stat[5*ii+1], stat[5*ii+2], stat[5*ii+2], stat[5*ii+3], stat[5*ii+4], stat[5*ii+4]); printf("\n\n"); // residuals printf("\nresuduals IPM ...\n"); d_res_ip_diag_mpc(N, nxx, nuu, nbb, idxb, hdA, hpBt, hpR, hpS, hpQ, hb, hrq, hd, hux, hpi, hlam, ht, hres_rq, hres_b, hres_d, &mu, work_diag); printf("\nresiduals IPM done\n"); printf("\nres_rq\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nuu[ii]+nxx[ii], hres_rq[ii], 1); printf("\nres_b\n"); for(ii=0; ii<N; ii++) d_print_mat(1, nxx[ii+1], hres_b[ii], 1); printf("\nres_d\n"); for(ii=0; ii<=N; ii++) { d_print_mat(1, nbb[ii], hres_d[ii], 1); d_print_mat(1, nbb[ii], hres_d[ii]+pnbb[ii], 1); } printf("\nres_mu\n"); d_print_mat(1, 1, &mu, 1); // timing printf("\ntiming ...\n\n"); gettimeofday(&tv20, NULL); // start nrep = 1000; for(ii=0; ii<nrep; ii++) { d_ip2_diag_mpc(&kk, kmax, mu0, mu_tol, alpha_min, 0, sigma_par, stat, N, nxx, nuu, nbb, idxb, hdA, hpBt, hpR, hpS, hpQ, hb, hd, hrq, hux, 1, hpi, hlam, ht, work_space_ip); } gettimeofday(&tv21, NULL); // start printf("\ntiming done\n\n"); time_ip_diag = (float) (tv21.tv_sec-tv20.tv_sec)/(nrep+0.0)+(tv21.tv_usec-tv20.tv_usec)/(nrep*1e6); // simulation printf("\nsimulation ...\n\n"); nrep = 15; for(ii=0; ii<nrep; ii++) { d_ip2_diag_mpc(&kk, kmax, mu0, mu_tol, alpha_min, 0, sigma_par, stat, N, nxx, nuu, nbb, idxb, hdA, hpBt, hpR, hpS, hpQ, hb, hd, hrq, hux, 1, hpi, hlam, ht, work_space_ip); dgemv_t_lib(nuu[0], nxx[0], hpBt[0], cnxx[0], hux[0], hux[0]+nuu[0], 1); for(jj=0; jj<nxx[0]-nx0-nu0; jj++) hux[0][nuu[0]+nxx[0]-jj-1] = hux[0][nuu[0]+nxx[0]-jj-1-nx0]; printf("\nsimulation step = %d, IPM iterations = %d, mu = %e\n\n", ii, kk, stat[5*(kk-1)+4]); d_print_mat(1, nuu[0]+nxx[0], hux[0], 1); } printf("\nsimulation done\n\n"); //exit(1); #if 1 // IPM printf("\nIPM full\n\n"); int ngg[N+1]; for(ii=0; ii<=N; ii++) ngg[ii] = 0; int pngg[N+1]; for(ii=0; ii<=N; ii++) pngg[ii] = (ngg[ii]+bs-1)/bs*bs; //int pnzM = pnzz[0]; // max //int cnxgM = cnxx[0]; // max //int work_space_int_size = 7*(N+1); //int work_space_double_size = pnzM*cnxgM + pnzM; //for(ii=0; ii<=N; ii++) // work_space_double_size += pnzz[ii]*cnll[ii] + 3*anzz[ii] + 2*anxx[ii] + 14*pnbb[ii] + 10*pngg[ii]; //printf("\nIPM diag work space size: %d double + %d int\n\n", work_space_double_size, work_space_int_size); //double *work_ipm_tv_double; d_zeros_align(&work_ipm_tv_double, work_space_double_size, 1); double *work_ipm_tv_double; d_zeros_align(&work_ipm_tv_double, d_ip2_hard_mpc_tv_work_space_size_double(N, nxx, nuu, nbb, ngg), 1); //int *work_ipm_tv_int = (int *) malloc(work_space_int_size*sizeof(int)); int *work_ipm_tv_int = (int *) malloc(d_ip2_hard_mpc_tv_work_space_size_int(N, nxx, nuu, nbb, ngg)*sizeof(int)); for(jj=0; jj<nuu[0]; jj++) { hux[0][jj] = 0.0; } for(; jj<nuu[0]+nu0; jj++) { hux[0][jj] = 7.5097; } for(; jj<nxx[0]; jj+=2) { hux[0][jj+0] = 15.01940; hux[0][jj+1] = 0.0; } //d_print_mat(1, nuu[0]+nxx[0], hux2[0], 1); printf("\nIPM solution ...\n"); d_ip2_hard_mpc_tv(&kk, kmax, mu0, mu_tol, alpha_min, 0, sigma_par, stat, N, nxx, nuu, nbb, idxb, ngg, hpBAbt_tv, hpRSQ_tv, dummy, hd, hux, 1, hpi, hlam, ht, work_ipm_tv_double, work_ipm_tv_int); printf("\nIPM solution done\n"); printf("\nux\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nuu[ii]+nxx[ii], hux[ii], 1); printf("\nlam\n"); for(ii=0; ii<=N; ii++) { d_print_mat(1, nbb[ii], hlam[ii], 1); d_print_mat(1, nbb[ii], hlam[ii]+pnbb[ii], 1); } printf("\nt\n"); for(ii=0; ii<=N; ii++) { d_print_mat(1, nbb[ii], ht[ii], 1); d_print_mat(1, nbb[ii], ht[ii]+pnbb[ii], 1); } printf("\nstatistics\n\n"); for(ii=0; ii<kk; ii++) printf("%d\t%f\t%f\t%f\t%e\t%f\t%f\t%e\n", ii+1, stat[5*ii+0], stat[5*ii+1], stat[5*ii+2], stat[5*ii+2], stat[5*ii+3], stat[5*ii+4], stat[5*ii+4]); printf("\n\n"); printf("\nresiduals ...\n\n"); d_res_ip_hard_mpc_tv(N, nxx, nuu, nbb, idxb, ngg, hpBAbt_tv, hpRSQ_tv, hrq, hux, dummy, hd, hpi, hlam, ht, hres_rq, hres_b, hres_d, &mu); printf("\nresiduals dones\n\n"); printf("\nres_rq\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nuu[ii]+nxx[ii], hres_rq[ii], 1); printf("\nres_b\n"); for(ii=0; ii<N; ii++) d_print_mat(1, nxx[ii+1], hres_b[ii], 1); printf("\nres_d\n"); for(ii=0; ii<=N; ii++) { d_print_mat(1, nbb[ii], hres_d[ii], 1); d_print_mat(1, nbb[ii], hres_d[ii]+pnbb[ii], 1); } printf("\nres_mu\n"); d_print_mat(1, 1, &mu, 1); // timing printf("\ntiming ...\n\n"); gettimeofday(&tv20, NULL); // start nrep = 1000; for(ii=0; ii<nrep; ii++) { d_ip2_hard_mpc_tv(&kk, kmax, mu0, mu_tol, alpha_min, 0, sigma_par, stat, N, nxx, nuu, nbb, idxb, ngg, hpBAbt_tv, hpRSQ_tv, dummy, hd, hux, 1, hpi, hlam, ht, work_ipm_tv_double, work_ipm_tv_int); } gettimeofday(&tv21, NULL); // start printf("\ntiming done\n\n"); time_ip_full_tv = (float) (tv21.tv_sec-tv20.tv_sec)/(nrep+0.0)+(tv21.tv_usec-tv20.tv_usec)/(nrep*1e6); free(work_ric_tv); free(work_ipm_tv_double); free(work_ipm_tv_int); for(ii=0; ii<N; ii++) { free(hpBAbt_tv[ii]); free(hpRSQ_tv[ii]); free(hpL_tv[ii]); free(hl[ii]); } free(hpRSQ_tv[N]); free(hpL_tv[N]); free(hl[N]); //exit(1); #endif // free memory for(ii=0; ii<=N; ii++) { free(idxb[ii]); free(hd[ii]); free(hlam[ii]); free(ht[ii]); } free(work_space_ip); #endif for(ii=0; ii<N; ii++) { free(hdA[ii]); free(hpBt[ii]); free(hpR[ii]); free(hpS[ii]); free(hpQ[ii]); free(hpLK[ii]); free(hpP[ii]); free(hrq[ii]); free(hux[ii]); free(hpi[ii]); free(hPb[ii]); free(hb[ii]); free(hres_rq[ii]); free(hres_b[ii]); } free(hpQ[N]); free(hpP[N]); free(pK); free(hrq[N]); free(hux[N]); free(hpi[N]); free(work_diag); free(hres_rq[N]); /************************************************ * test of normal riccati & IPM ************************************************/ printf("\nRiccati full\n\n"); nx = 25; nu = 1; N = 11; int rep; int nz = nx+nu+1; int anz = nal*((nz+nal-1)/nal); int anx = nal*((nx+nal-1)/nal); int pnz = bs*((nz+bs-1)/bs); int pnx = bs*((nx+bs-1)/bs); int pnu = bs*((nu+bs-1)/bs); int cnz = ncl*((nx+nu+1+ncl-1)/ncl); int cnx = ncl*((nx+ncl-1)/ncl); int cnu = ncl*((nu+ncl-1)/ncl); int cnl = cnz<cnx+ncl ? cnx+ncl : cnz; const int ncx = nx; #if 1 double *BAb_temp; d_zeros(&BAb_temp, nx, nu+nx+1); double *hpBAbt2[N]; ptrB = BBB; for(ii=0; ii<N; ii++) { //printf("\n%d\n", ii); d_zeros_align(&hpBAbt2[ii], pnz, cnx); for(jj=0; jj<nx*(nx+nu+1); jj++) BAb_temp[jj] = 0.0; for(jj=0; jj<nu; jj++) BAb_temp[jj*(nx+1)] = 1.0; d_copy_mat(nxx[ii+1]-1, nuu[ii], ptrB, nxx[ii+1]-1, BAb_temp+1, nx); ptrB += nxx[ii+1]-1; for(jj=0; jj<nxx[ii+1]; jj++) BAb_temp[nuu[ii]*nx+jj*(nx+1)] = 1.0; //for(jj=0; jj<nxx[ii+1]; jj++) BAb_temp[(nuu[ii]+nxx[ii+1])*nx+jj] = 1.0; //d_print_mat(nx, nu+nx+1, BAb_temp, nx); d_cvt_tran_mat2pmat(nx, nx+nu+1, BAb_temp, nx, 0, hpBAbt2[ii], cnx); //d_print_pmat(nx+nu+1, nx, bs, hpBAbt2[ii], cnx); } double *RSQ; d_zeros(&RSQ, nz, nz); double *hpRSQ[N+1]; for(ii=0; ii<=N; ii++) { //printf("\n%d\n", ii); d_zeros_align(&hpRSQ[ii], pnz, cnz); for(jj=0; jj<nz*nz; jj++) RSQ[jj] = 0.0; for(jj=nu; jj<2*nu; jj++) RSQ[jj*(nz+1)] = 1.0; for(jj=nu+nxx[ii]-nx0; jj<nu+nxx[ii]; jj++) RSQ[jj*(nz+1)] = 1.0; d_cvt_mat2pmat(nz, nz, RSQ, nz, 0, hpRSQ[ii], cnz); //d_print_pmat(nz, nz, bs, hpRSQ[ii], cnz); } double *hpL[N+1]; double *hq2[N+1]; double *hux2[N+1]; double *hpi2[N+1]; double *hPb2[N]; for(jj=0; jj<N; jj++) { d_zeros_align(&hq2[jj], pnz, 1); // it has to be pnz !!! d_zeros_align(&hpL[jj], pnz, cnl); d_zeros_align(&hux2[jj], pnz, 1); // it has to be pnz !!! d_zeros_align(&hpi2[jj], pnx, 1); d_zeros_align(&hPb2[jj], pnx, 1); } d_zeros_align(&hpL[N], pnz, cnl); d_zeros_align(&hq2[N], pnz, 1); // it has to be pnz !!! d_zeros_align(&hux2[N], pnz, 1); // it has to be pnz !!! d_zeros_align(&hpi2[N], pnx, 1); //double *work; d_zeros_align(&work, 2*anz, 1); double *work; d_zeros_align(&work, pnz, cnx); for(jj=0; jj<nx+nu; jj++) hux2[0][jj] = 0.0; for(jj=0; jj<nu; jj++) { hux2[0][nu+jj] = 7.5097; } for(; jj<nx; jj+=2) { hux2[0][nu+jj+0] = 15.01940; hux2[0][nu+jj+1] = 0.0; } printf("\nfactorization and backward-forward solution ...\n"); d_ric_sv_mpc(nx, nu, N, hpBAbt2, hpRSQ, 0, dummy, dummy, hux2, hpL, work, diag, COMPUTE_MULT, hpi2, 0, 0, 0, dummy, dummy, dummy, 0); printf("\nfactorization and backward-forward solution done\n\n"); //for(ii=0; ii<=N; ii++) // d_print_pmat(pnz, cnl-3, bs, hpL[ii], cnl); //d_print_pmat(pnz, nu, bs, hpL[0], cnl); //d_print_pmat(pnz, cnl-3, bs, hpL[1], cnl); //d_print_pmat(pnz, cnl-3, bs, hpL[2], cnl); //d_print_pmat(pnz, cnl-3, bs, hpL[N-3], cnl); //d_print_pmat(pnz, cnl-3, bs, hpL[N-2], cnl); //d_print_pmat(pnz, cnl-3, bs, hpL[N-1], cnl); //d_print_pmat(pnz, cnl, bs, hpL[N], cnl); #if 1 printf("\nux Riccati full\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nx+nu, hux2[ii], 1); #endif // residuals double *hres_rq2[N+1]; double *hres_b2[N]; for(ii=0; ii<N; ii++) { d_zeros_align(&hres_rq2[ii], pnz, 1); d_zeros_align(&hres_b2[ii], pnx, 1); } d_zeros_align(&hres_rq2[N], pnz, 1); printf("\nresuduals ...\n"); d_res_mpc(nx, nu, N, hpBAbt2, hpRSQ, hq2, hux2, hpi2, hres_rq2, hres_b2); printf("\nresiduals done\n\n"); printf("\nres_q full\n"); d_print_mat(1, nu, hres_rq2[ii], 1); for(ii=0; ii<N; ii++) d_print_mat(1, nx+nu, hres_rq2[ii], 1); printf("\nres_b full\n"); for(ii=0; ii<N; ii++) d_print_mat(1, nx, hres_b2[ii], 1); // timing //struct timeval tv20, tv21; #if 1 printf("\ntiming ...\n\n"); gettimeofday(&tv20, NULL); // start nrep = 10000; for(ii=0; ii<nrep; ii++) { d_ric_sv_mpc(nx, nu, N, hpBAbt2, hpRSQ, 0, dummy, dummy, hux2, hpL, work, diag, COMPUTE_MULT, hpi2, 0, 0, 0, dummy, dummy, dummy, 0); } gettimeofday(&tv21, NULL); // start time_ric_full = (float) (tv21.tv_sec-tv20.tv_sec)/(nrep+0.0)+(tv21.tv_usec-tv20.tv_usec)/(nrep*1e6); printf("\ntiming done\n\n"); #endif printf("\nIPM full\n\n"); int nb = nu+nx; int ng = 0; int ngN = 0; int pnb = (nb+bs-1)/bs*bs; int png = (ng+bs-1)/bs*bs; int pngN = (ngN+bs-1)/bs*bs; double *hd2[N+1]; double *hlam2[N+1]; double *ht2[N+1]; for(ii=0; ii<N; ii++) { d_zeros_align(&hd2[ii], 2*pnb+2*png, 1); d_zeros_align(&hlam2[ii],2*pnb+2*png, 1); d_zeros_align(&ht2[ii], 2*pnb+2*png, 1); } d_zeros_align(&hd2[N], 2*pnb+2*pngN, 1); d_zeros_align(&hlam2[N],2*pnb+2*pngN, 1); d_zeros_align(&ht2[N], 2*pnb+2*pngN, 1); // work space // more than enought !!!!! double *work_ipm_full; d_zeros_align(&work_ipm_full, hpmpc_ip_hard_mpc_dp_work_space(N, nx, nu, nb, ng, ngN), 1); // bounds for(ii=0; ii<=N; ii++) { for(jj=0; jj<nu; jj++) { hd2[ii][jj] = -20.5; hd2[ii][pnb+jj] = -20.5; } for(; jj<2*nu; jj++) { hd2[ii][jj] = - 2.5; hd2[ii][pnb+jj] = -10.0; } for(; jj<2*nu+(N-ii)*nx0; jj++) { hd2[ii][jj] = -100.0; hd2[ii][pnb+jj] = -100.0; } hd2[ii][jj+0] = 0.0; hd2[ii][pnb+jj+0] = -20.0; hd2[ii][jj+1] = -10.0; hd2[ii][pnb+jj+1] = -10.0; jj += 2; for(; jj<nu+nx; jj++) { hd2[ii][jj] = -100.0; hd2[ii][pnb+jj] = -100.0; } //d_print_mat(1, nb, hd2[ii], 1); //d_print_mat(1, nb, hd2[ii]+pnb, 1); } //exit(1); printf("\nIPM full solve ...\n\n"); d_ip2_hard_mpc(&kk, kmax, mu0, mu_tol, alpha_min, 0, sigma_par, stat, nx, nu, N, nb, ng, ngN, hpBAbt2, hpRSQ, dummy, hd2, hux2, 1, hpi2, hlam2, ht2, work_ipm_full); printf("\nIPM full solve done\n\n"); #if 1 printf("\nux IPM full\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nx+nu, hux2[ii], 1); #endif printf("\nstatistics\n\n"); for(ii=0; ii<kk; ii++) printf("%d\t%f\t%f\t%f\t%e\t%f\t%f\t%e\n", ii+1, stat[5*ii+0], stat[5*ii+1], stat[5*ii+2], stat[5*ii+2], stat[5*ii+3], stat[5*ii+4], stat[5*ii+4]); printf("\n\n"); // timing printf("\ntiming ...\n\n"); gettimeofday(&tv20, NULL); // start nrep = 1000; for(ii=0; ii<nrep; ii++) { d_ip2_hard_mpc(&kk, kmax, mu0, mu_tol, alpha_min, 0, sigma_par, stat, nx, nu, N, nb, ng, ngN, hpBAbt2, hpRSQ, dummy, hd2, hux2, 1, hpi2, hlam2, ht2, work_ipm_full); } gettimeofday(&tv21, NULL); // start printf("\ntiming done\n\n"); time_ip_full = (float) (tv21.tv_sec-tv20.tv_sec)/(nrep+0.0)+(tv21.tv_usec-tv20.tv_usec)/(nrep*1e6); // free memory free(work_ipm_full); for(ii=0; ii<N; ii++) { free(hd2[ii]); free(hlam2[ii]); free(ht2[ii]); } free(hd2[N]); free(hlam2[N]); free(ht2[N]); // free memory free(work); free(RSQ); free(BAb_temp); for(ii=0; ii<N; ii++) { free(hpBAbt2[ii]); free(hpRSQ[ii]); free(hpL[ii]); free(hux2[ii]); free(hpi2[ii]); free(hq2[ii]); free(hPb2[ii]); free(hres_rq2[ii]); free(hres_b2[ii]); } free(hpRSQ[N]); free(hpL[N]); free(hux2[N]); free(hpi2[N]); free(hq2[N]); free(hres_rq2[N]); #endif printf("\nric diag time = %e\t\tric full time = %e\t\tric full tv time = %e\t\tip diag time = %e\t\tip full time = %e\t\tip full tv time = %e\n\n", time_ric_diag, time_ric_full, time_ric_full_tv, time_ip_diag, time_ip_full, time_ip_full_tv); #endif }
/* primal-dual interior-point method, hard constraints, time variant matrices (mpc version) */ int d_ip2_hard_mpc(int *kk, int k_max, double mu0, double mu_tol, double alpha_min, int warm_start, double *sigma_par, double *stat, int nx, int nu, int N, int nb, int ng, int ngN, double **pBAbt, double **pQ, double **pDCt, double **d, double **ux, int compute_mult, double **pi, double **lam, double **t, double *work_memory) { int nbu = nu<nb ? nu : nb ; // indeces int jj, ll, ii, bs0; // constants const int bs = D_MR; //d_get_mr(); const int ncl = D_NCL; const int nal = bs*ncl; // number of doubles per cache line const int nz = nx+nu+1; const int nxu = nx+nu; const int pnz = bs*((nz+bs-1)/bs); const int pnx = bs*((nx+bs-1)/bs); const int pnb = bs*((nb+bs-1)/bs); // simd aligned number of two-sided box constraints !!!!!!!!!!!!!!!!!! const int png = bs*((ng+bs-1)/bs); // simd aligned number of two-sided general constraints !!!!!!!!!!!!!!!!!! const int pngN = bs*((ngN+bs-1)/bs); // simd aligned number of two-sided general constraints at stage N !!!!!!!!!!!!!!!!!! const int cnz = ncl*((nz+ncl-1)/ncl); const int cnx = ncl*((nx+ncl-1)/ncl); // const int cng = ncl*((ng+ncl-1)/ncl); const int cngN = ncl*((ngN+ncl-1)/ncl); const int cnxg = ncl*((ng+nx+ncl-1)/ncl); const int anz = nal*((nz+nal-1)/nal); const int anx = nal*((nx+nal-1)/nal); // const int anb = nal*((2*nb+nal-1)/nal); // cache aligned number of box constraints //const int anb = nal*((nb+nal-1)/nal); // cache aligned number of two-sided box constraints !!!!!!!!!!!!!!!!!! // const int pad = (ncl-nx%ncl)%ncl; // packing between BAbtL & P //const int cnl = cnz<cnx+ncl ? nx+pad+cnx+ncl : nx+pad+cnz; const int cnl = cnz<cnx+ncl ? cnx+ncl : cnz; //printf("\n%d %d %d %d %d\n", N, nx, nu, nb, ng); //d_print_pmat(nz, nx, bs, pBAbt[0], cnx); //d_print_pmat(nz, nx, bs, pBAbt[1], cnx); //d_print_pmat(nz, nx, bs, pBAbt[N-1], cnx); //d_print_pmat(nz, nz, bs, pQ[0], cnz); //d_print_pmat(nz, nz, bs, pQ[1], cnz); //d_print_pmat(nz, nz, bs, pQ[N], cnz); //d_print_pmat(nx+nu, ng, bs, pDCt[0], cng); //d_print_pmat(nx+nu, ng, bs, pDCt[1], cng); //d_print_pmat(nx+nu, ng, bs, pDCt[N], cng); //d_print_mat(1, 2*pnb+2*png, d[0], 1); //d_print_mat(1, 2*pnb+2*png, d[1], 1); //d_print_mat(1, 2*pnb+2*png, d[N], 1); //d_print_mat(1, nx+nu, ux[0], 1); //d_print_mat(1, nx+nu, ux[1], 1); //d_print_mat(1, nx+nu, ux[N], 1); //exit(1); // initialize work space double *ptr; ptr = work_memory; double *(dux[N+1]); double *(dpi[N+1]); double *(pL[N+1]); double *(pd[N+1]); // pointer to diagonal of Hessian double *(pl[N+1]); // pointer to linear part of Hessian double *(bd[N+1]); // backup diagonal of Hessian double *(bl[N+1]); // backup linear part of Hessian double *work; double *diag; double *(dlam[N+1]); double *(dt[N+1]); double *(lamt[N+1]); double *(t_inv[N+1]); double *(Qx[N+1]); double *(qx[N+1]); double *(Pb[N]); // ptr += (N+1)*(pnx + pnz*cnl + 12*pnz) + 3*pnz; // inputs and states for(jj=0; jj<=N; jj++) { dux[jj] = ptr; ptr += anz; } // equality constr multipliers for(jj=0; jj<=N; jj++) { dpi[jj] = ptr; ptr += anx; } // Hessian for(jj=0; jj<=N; jj++) { pd[jj] = ptr; //pQ[jj]; pl[jj] = ptr + anz; //pQ[jj] + ((nu+nx)/bs)*bs*cnz + (nu+nx)%bs; bd[jj] = ptr + 2*anz; bl[jj] = ptr + 3*anz; ptr += 4*anz; // backup for(ll=0; ll<nx+nu; ll++) { bd[jj][ll] = pQ[jj][(ll/bs)*bs*cnz+ll%bs+ll*bs]; bl[jj][ll] = pQ[jj][((nx+nu)/bs)*bs*cnz+(nx+nu)%bs+ll*bs]; } } // work space for(jj=0; jj<=N; jj++) { pL[jj] = ptr; ptr += pnz*cnl; } work = ptr; //ptr += 2*anz; if(cngN<=cnxg) ptr += pnz*cnxg; else ptr += pnz*cngN; diag = ptr; ptr += anz; // slack variables, Lagrangian multipliers for inequality constraints and work space (assume # box constraints <= 2*(nx+nu) < 2*pnz) for(jj=0; jj<N; jj++) { dlam[jj] = ptr; dt[jj] = ptr + 2*pnb+2*png; ptr += 4*pnb+4*png; } dlam[N] = ptr; dt[N] = ptr + 2*pnb+2*pngN; ptr += 4*pnb+4*pngN; for(jj=0; jj<N; jj++) { lamt[jj] = ptr; ptr += 2*pnb+2*png; } lamt[N] = ptr; ptr += 2*pnb+2*pngN; for(jj=0; jj<N; jj++) { t_inv[jj] = ptr; ptr += 2*pnb+2*png; } t_inv[N] = ptr; ptr += 2*pnb+2*pngN; for(jj=0; jj<N; jj++) { Qx[jj] = ptr; qx[jj] = ptr+png; ptr += 2*pnb+2*png; } Qx[N] = ptr; qx[N] = ptr+pngN; ptr += 2*pnb+2*pngN; // backup of P*b for(jj=0; jj<N; jj++) { Pb[jj] = ptr; ptr += anx; } double temp0, temp1; double alpha, mu, mu_aff; double mu_scal = N*2*(nb+ng)+2*ngN; //printf("\nmu_scal = %f\n", mu_scal); mu_scal = 1.0/mu_scal; //printf("\nmu_scal = %f\n", mu_scal); double sigma, sigma_decay, sigma_min; //printf("\n%d %d %d\n", ng, ngN, N*2*ng+2*ngN); //exit(1); sigma = sigma_par[0]; //0.4; sigma_decay = sigma_par[1]; //0.3; sigma_min = sigma_par[2]; //0.01; // initialize ux & t>0 (slack variable) d_init_var_hard_mpc(N, nx, nu, nb, ng, ngN, ux, pi, pDCt, d, t, lam, mu0, warm_start); #if 0 d_print_mat(1, 2*pnb+2*png, t[0], 1); d_print_mat(1, 2*pnb+2*png, t[1], 1); d_print_mat(1, 2*pnb+2*pngN, t[N], 1); d_print_mat(1, 2*pnb+2*png, lam[0], 1); d_print_mat(1, 2*pnb+2*png, lam[1], 1); d_print_mat(1, 2*pnb+2*pngN, lam[N], 1); exit(1); #endif // initialize pi for(jj=0; jj<=N; jj++) for(ll=0; ll<nx; ll++) dpi[jj][ll] = 0.0; // initialize dux for(ll=0; ll<nx; ll++) dux[0][nu+ll] = ux[0][nu+ll]; // compute the duality gap //alpha = 0.0; // needed to compute mu !!!!! //d_compute_mu_hard_mpc(N, nx, nu, nb, &mu, mu_scal, alpha, lam, dlam, t, dt); mu = mu0; // set to zero iteration count *kk = 0; // larger than minimum accepted step size alpha = 1.0; // update hessian in Riccati routine const int update_hessian = 1; int fast_rsqrt = 0; // IP loop while( *kk<k_max && mu>mu_tol && alpha>=alpha_min ) { //update cost function matrices and vectors (box constraints) d_update_hessian_hard_mpc(N, nx, nu, nb, ng, ngN, cnz, 0.0, t, t_inv, lam, lamt, dlam, Qx, qx, bd, bl, pd, pl, d); #if 0 d_print_mat(1, 2*pnb+2*png, pd[0], 1); d_print_mat(1, 2*pnb+2*png, pd[1], 1); d_print_mat(1, 2*pnb+2*png, pd[N], 1); d_print_mat(1, 2*pnb+2*png, pl[0], 1); d_print_mat(1, 2*pnb+2*png, pl[1], 1); d_print_mat(1, 2*pnb+2*png, pl[N], 1); #if 0 d_print_mat(1, 2*pnb+2*png, Qx[0], 1); d_print_mat(1, 2*pnb+2*png, Qx[1], 1); d_print_mat(1, 2*pnb+2*pngN, Qx[N], 1); d_print_mat(1, 2*pnb+2*png, qx[0], 1); d_print_mat(1, 2*pnb+2*png, qx[1], 1); d_print_mat(1, 2*pnb+2*pngN, qx[N], 1); #endif exit(1); #endif #if 0 for(ii=0; ii<=N; ii++) d_print_mat(1, nu+nx, pd[ii], 1); for(ii=0; ii<=N; ii++) d_print_mat(1, nu+nx, pl[ii], 1); for(ii=0; ii<N; ii++) d_print_mat(1, ng, Qx[ii], 1); d_print_mat(1, ngN, Qx[N], 1); for(ii=0; ii<N; ii++) d_print_mat(1, ng, qx[ii], 1); d_print_mat(1, ngN, qx[N], 1); if(*kk==1) exit(1); #endif // compute the search direction: factorize and solve the KKT system #if defined(FAST_RSQRT) if(mu>1e-2) fast_rsqrt = 2; else { if(mu>1e-4) fast_rsqrt = 1; else fast_rsqrt = 0; } #else fast_rsqrt = 0; #endif //printf("\n%d %f\n", fast_rsqrt, mu); d_back_ric_sv(N, nx, nu, pBAbt, pQ, update_hessian, pd, pl, 1, dux, pL, work, diag, 1, Pb, compute_mult, dpi, nb, ng, ngN, pDCt, Qx, qx); #if 0 for(ii=0; ii<=N; ii++) d_print_pmat(nz, nz, bs, pL[ii], cnl); exit(1); #endif #if 0 printf("\ndux\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nx+nu, dux[ii], 1); if(*kk==1) exit(1); #endif #if 1 // compute t_aff & dlam_aff & dt_aff & alpha for(jj=0; jj<=N; jj++) for(ll=0; ll<2*nb; ll++) dlam[jj][ll] = 0.0; alpha = 1.0; d_compute_alpha_hard_mpc(N, nx, nu, nb, ng, ngN, &alpha, t, dt, lam, dlam, lamt, dux, pDCt, d); stat[5*(*kk)] = sigma; stat[5*(*kk)+1] = alpha; alpha *= 0.995; // compute the affine duality gap d_compute_mu_hard_mpc(N, nx, nu, nb, ng, ngN, &mu_aff, mu_scal, alpha, lam, dlam, t, dt); stat[5*(*kk)+2] = mu_aff; //mu_aff = 1.346982; // TODO remove !!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // compute sigma sigma = mu_aff/mu; sigma = sigma*sigma*sigma; // if(sigma<sigma_min) // sigma = sigma_min; d_update_gradient_hard_mpc(N, nx, nu, nb, ng, ngN, sigma*mu, dt, dlam, t_inv, pl, qx); #if 0 for(ii=0; ii<=N; ii++) d_print_mat(1, nu+nx, pl[ii], 1); //for(ii=0; ii<N; ii++) // d_print_mat(1, ng, qx[ii], 1); //d_print_mat(1, ngN, qx[N], 1); if(*kk==1) exit(1); #endif #if 0 // first stage for(ii=0; ii<2*nbu; ii+=2) { dlam[0][ii+0] = t_inv[0][ii+0]*(sigma*mu - dlam[0][ii+0]*dt[0][ii+0]); // !!!!! dlam[0][ii+1] = t_inv[0][ii+1]*(sigma*mu - dlam[0][ii+1]*dt[0][ii+1]); // !!!!! pl[0][ii/2] += dlam[0][ii+1] - dlam[0][ii+0]; } // middle stages for(jj=1; jj<N; jj++) { for(ii=0; ii<2*nb; ii+=2) { dlam[jj][ii+0] = t_inv[jj][ii+0]*(sigma*mu - dlam[jj][ii+0]*dt[jj][ii+0]); // !!!!! dlam[jj][ii+1] = t_inv[jj][ii+1]*(sigma*mu - dlam[jj][ii+1]*dt[jj][ii+1]); // !!!!! pl[jj][ii/2] += dlam[jj][ii+1] - dlam[jj][ii+0]; } } // last stages for(ii=2*nu; ii<2*nb; ii+=2) { dlam[jj][ii+0] = t_inv[jj][ii+0]*(sigma*mu - dlam[jj][ii+0]*dt[jj][ii+0]); // !!!!! dlam[jj][ii+1] = t_inv[jj][ii+1]*(sigma*mu - dlam[jj][ii+1]*dt[jj][ii+1]); // !!!!! pl[jj][ii/2] += dlam[jj][ii+1] - dlam[jj][ii+0]; } #endif // copy b into x for(ii=0; ii<N; ii++) for(jj=0; jj<nx; jj++) dux[ii+1][nu+jj] = pBAbt[ii][((nu+nx)/bs)*bs*cnx+(nu+nx)%bs+bs*jj]; // copy b // solve the system d_ric_trs_mpc(nx, nu, N, pBAbt, pL, pl, dux, work, 0, Pb, compute_mult, dpi, nb, ng, ngN, pDCt, qx); #if 0 printf("\ndux\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nx+nu, dux[ii], 1); if(*kk==1) exit(1); #endif #endif // compute t & dlam & dt & alpha alpha = 1.0; d_compute_alpha_hard_mpc(N, nx, nu, nb, ng, ngN, &alpha, t, dt, lam, dlam, lamt, dux, pDCt, d); stat[5*(*kk)] = sigma; stat[5*(*kk)+3] = alpha; alpha *= 0.995; // update x, u, lam, t & compute the duality gap mu d_update_var_hard_mpc(N, nx, nu, nb, ng, ngN, &mu, mu_scal, alpha, ux, dux, t, dt, lam, dlam, pi, dpi); stat[5*(*kk)+4] = mu; // update sigma /* sigma *= sigma_decay;*/ /* if(sigma<sigma_min)*/ /* sigma = sigma_min;*/ /* if(alpha<0.3)*/ /* sigma = sigma_par[0];*/ #if 0 d_print_mat(1, 2*pnb+2*png, lam[0], 1); d_print_mat(1, 2*pnb+2*png, lam[1], 1); d_print_mat(1, 2*pnb+2*png, lam[N], 1); d_print_mat(1, 2*pnb+2*png, t[0], 1); d_print_mat(1, 2*pnb+2*png, t[1], 1); d_print_mat(1, 2*pnb+2*png, t[N], 1); printf("\n%f\n", mu); exit(1); #endif //mu = 13.438997; // increment loop index (*kk)++; } // end of IP loop // restore Hessian for(jj=0; jj<=N; jj++) { for(ll=0; ll<nx+nu; ll++) { pQ[jj][(ll/bs)*bs*cnz+ll%bs+ll*bs] = bd[jj][ll]; pQ[jj][((nx+nu)/bs)*bs*cnz+(nx+nu)%bs+ll*bs] = bl[jj][ll]; } } // successful exit if(mu<=mu_tol) return 0; // max number of iterations reached if(*kk>=k_max) return 1; // no improvement if(alpha<alpha_min) return 2; // impossible return -1; } // end of ipsolver
int main() { printf("\n"); printf("\n"); printf("\n"); printf(" HPMPC -- Library for High-Performance implementation of solvers for MPC.\n"); printf(" Copyright (C) 2014-2015 by Technical University of Denmark. All rights reserved.\n"); printf("\n"); printf(" HPMPC is distributed in the hope that it will be useful,\n"); printf(" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"); printf(" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"); printf(" See the GNU Lesser General Public License for more details.\n"); printf("\n"); printf("\n"); printf("\n"); #if defined(TARGET_X64_AVX2) || defined(TARGET_X64_AVX) || defined(TARGET_X64_SSE3) || defined(TARGET_X86_ATOM) || defined(TARGET_AMD_SSE3) _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); // flush to zero subnormals !!! works only with one thread !!! #endif int ii, jj; int rep, nrep=1000;//NREP; int nx = NX; // number of states (it has to be even for the mass-spring system test problem) int nu = NU; // number of inputs (controllers) (it has to be at least 1 and at most nx/2 for the mass-spring system test problem) int N = NN; // horizon lenght int nb = nu+nx; // number of box constrained inputs and states int ng = nx; //4; // number of general constraints int ngN = nx; // number of general constraints at the last stage # define USE_IPM_RES 1 // int M = 32; // where the equality constraint hold int nbu = nu<nb ? nu : nb ; int nbx = nb-nu>0 ? nb-nu : 0; #define KEEP_X0 0 // stage-wise variant size int nx_v[N+1]; #if KEEP_X0 nx_v[0] = nx; #else nx_v[0] = 0; #endif for(ii=1; ii<=N; ii++) nx_v[ii] = nx; int nu_v[N+1]; for(ii=0; ii<N; ii++) nu_v[ii] = nu; nu_v[N] = 0; int nb_v[N+1]; #if KEEP_X0 nb_v[0] = nb; #else nb_v[0] = nbu; #endif for(ii=1; ii<N; ii++) nb_v[ii] = nb; nb_v[N] = nbx; int ng_v[N+1]; for(ii=0; ii<N; ii++) ng_v[ii] = ng; ng_v[N] = ngN; // ng_v[M] = nx; // XXX printf(" Test problem: mass-spring system with %d masses and %d controls.\n", nx/2, nu); printf("\n"); printf(" MPC problem size: %d states, %d inputs, %d horizon length, %d two-sided box constraints, %d two-sided general constraints.\n", nx, nu, N, nb, ng); printf("\n"); #if IP == 1 printf(" IP method parameters: primal-dual IP, double precision, %d maximum iterations, %5.1e exit tolerance in duality measure (edit file test_param.c to change them).\n", K_MAX, MU_TOL); #elif IP == 2 printf(" IP method parameters: predictor-corrector IP, double precision, %d maximum iterations, %5.1e exit tolerance in duality measure (edit file test_param.c to change them).\n", K_MAX, MU_TOL); #else printf(" Wrong value for IP solver choice: %d\n", IP); #endif int info = 0; const int bs = D_MR; //d_get_mr(); const int ncl = D_NCL; int pnz = (nu+nx+1+bs-1)/bs*bs; int pnu = (nu+bs-1)/bs*bs; int pnu1 = (nu+1+bs-1)/bs*bs; int pnx = (nx+bs-1)/bs*bs; int pnx1 = (nx+1+bs-1)/bs*bs; int pnux = (nu+nx+bs-1)/bs*bs; int cnx = (nx+ncl-1)/ncl*ncl; int cnu = (nu+ncl-1)/ncl*ncl; int cnux = (nu+nx+ncl-1)/ncl*ncl; int pnb_v[N+1]; int png_v[N+1]; int pnx_v[N+1]; int pnz_v[N+1]; int pnux_v[N+1]; int cnx_v[N+1]; int cnux_v[N+1]; int cng_v[N+1]; for(ii=0; ii<N; ii++) { pnb_v[ii] = (nb_v[ii]+bs-1)/bs*bs; png_v[ii] = (ng_v[ii]+bs-1)/bs*bs; pnx_v[ii] = (nx_v[ii]+bs-1)/bs*bs; pnz_v[ii] = (nu_v[ii]+nx_v[ii]+1+bs-1)/bs*bs; pnux_v[ii] = (nu_v[ii]+nx_v[ii]+bs-1)/bs*bs; cnx_v[ii] = (nx_v[ii]+ncl-1)/ncl*ncl; cnux_v[ii] = (nu_v[ii]+nx_v[ii]+ncl-1)/ncl*ncl; cng_v[ii] = (ng_v[ii]+ncl-1)/ncl*ncl; } ii = N; pnb_v[ii] = (nb_v[ii]+bs-1)/bs*bs; png_v[ii] = (ng_v[ii]+bs-1)/bs*bs; pnx_v[ii] = (nx_v[ii]+bs-1)/bs*bs; pnz_v[ii] = (nx_v[ii]+1+bs-1)/bs*bs; pnux_v[ii] = (nx_v[ii]+bs-1)/bs*bs; cnx_v[ii] = (nx_v[ii]+ncl-1)/ncl*ncl; cnux_v[ii] = (nx_v[ii]+ncl-1)/ncl*ncl; cng_v[ii] = (ng_v[ii]+ncl-1)/ncl*ncl; /************************************************ * dynamical system ************************************************/ double *A; d_zeros(&A, nx, nx); // states update matrix double *B; d_zeros(&B, nx, nu); // inputs matrix double *b; d_zeros_align(&b, nx, 1); // states offset double *x0; d_zeros_align(&x0, nx, 1); // initial state double Ts = 0.5; // sampling time mass_spring_system(Ts, nx, nu, N, A, B, b, x0); for(jj=0; jj<nx; jj++) b[jj] = 0.1; for(jj=0; jj<nx; jj++) x0[jj] = 0; x0[0] = 2.5; x0[1] = 2.5; double *pA; d_zeros_align(&pA, pnx, cnx); d_cvt_mat2pmat(nx, nx, A, nx, 0, pA, cnx); double *b0; d_zeros_align(&b0, pnx, 1); for(ii=0; ii<nx; ii++) b0[ii] = b[ii]; #if ! KEEP_X0 dgemv_n_lib(nx, nx, pA, cnx, x0, 1, b0, b0); #endif double *pBAbt0; d_zeros_align(&pBAbt0, pnz_v[0], cnx_v[1]); d_cvt_tran_mat2pmat(nx_v[1], nu_v[0], B, nx_v[1], 0, pBAbt0, cnx_v[1]); d_cvt_tran_mat2pmat(nx_v[1], nx_v[0], A, nx_v[1], nu_v[0], pBAbt0+nu_v[0]/bs*bs*cnx_v[1]+nu_v[0]%bs, cnx_v[1]); d_cvt_tran_mat2pmat(nx_v[1], 1, b0, nx_v[1], nu_v[0]+nx_v[0], pBAbt0+(nu_v[0]+nx_v[0])/bs*bs*cnx_v[1]+(nu_v[0]+nx_v[0])%bs, cnx_v[1]); double *pBAbt1; if(N>1) { d_zeros_align(&pBAbt1, pnz_v[1], cnx_v[2]); d_cvt_tran_mat2pmat(nx_v[2], nu_v[1], B, nx_v[2], 0, pBAbt1, cnx_v[2]); d_cvt_tran_mat2pmat(nx_v[2], nx_v[1], A, nx_v[2], nu_v[1], pBAbt1+nu_v[1]/bs*bs*cnx_v[2]+nu_v[1]%bs, cnx_v[2]); d_cvt_tran_mat2pmat(nx_v[2], 1, b, nx_v[2], nu_v[1]+nx_v[1], pBAbt1+(nu_v[1]+nx_v[1])/bs*bs*cnx_v[2]+(nu_v[1]+nx_v[1])%bs, cnx_v[2]); } #if 0 d_print_pmat(nu_v[0]+nx_v[0]+1, nx_v[1], bs, pBAbt0, cnx_v[1]); d_print_pmat(nu_v[1]+nx_v[1]+1, nx_v[2], bs, pBAbt1, cnx_v[2]); exit(2); #endif /************************************************ * box & general constraints ************************************************/ int *idx0; i_zeros(&idx0, nb_v[0], 1); double *d0; d_zeros_align(&d0, 2*pnb_v[0]+2*png_v[0], 1); #if KEEP_X0 for(jj=0; jj<nbu; jj++) { d0[jj] = - 0.5; // umin d0[pnb_v[0]+jj] = 0.5; // umax idx0[jj] = jj; } for(; jj<nb; jj++) { d0[jj] = x0[jj-nu]; // xmin d0[pnb_v[0]+jj] = x0[jj-nu]; // xmax idx0[jj] = jj; } #else for(jj=0; jj<nbu; jj++) { d0[jj] = - 0.5; // umin d0[pnb_v[0]+jj] = 0.5; // umax idx0[jj] = jj; } #endif for(jj=0; jj<ng_v[0]; jj++) { d0[2*pnb_v[0]+jj] = - 100.0; // xmin d0[2*pnb_v[0]+png_v[0]+jj] = 100.0; // xmax } #if 0 i_print_mat(1, nb_v[0], idx0, 1); d_print_mat(1, 2*pnb_v[0]+2*png_v[0], d0, 1); exit(2); #endif int *idx1; i_zeros(&idx1, nb_v[1], 1); double *d1; d_zeros_align(&d1, 2*pnb_v[1]+2*png_v[1], 1); for(jj=0; jj<nbu; jj++) { d1[jj] = - 0.5; // umin d1[pnb_v[1]+jj] = 0.5; // umax idx1[jj] = jj; } for(; jj<nb; jj++) { d1[jj] = - 10.0; // xmin d1[pnb_v[1]+jj] = 10.0; // xmax idx1[jj] = jj; } for(jj=0; jj<ng_v[1]; jj++) { d1[2*pnb_v[1]+jj] = - 100.0; // xmin d1[2*pnb_v[1]+png_v[1]+jj] = 100.0; // xmax } // i_print_mat(nb, 1, idx1, nb); int *idxN; i_zeros(&idxN, nb_v[N], 1); double *dN; d_zeros_align(&dN, 2*pnb_v[N]+2*png_v[N], 1); for(jj=0; jj<nbx; jj++) { dN[jj] = - 10.0; // xmin dN[pnb_v[N]+jj] = 10.0; // xmax idxN[jj] = jj; } for(jj=0; jj<ng_v[N]; jj++) { dN[2*pnb_v[N]+jj] = - 0.0; // xmin dN[2*pnb_v[N]+png_v[N]+jj] = 0.0; // xmax } // d_print_mat(1, 2*pnb+2*png, d, 1); // d_print_mat(1, 2*pnb_v[N]+2*png_v[N], dN, 1); // exit(1); // double *dM; d_zeros_align(&dM, 2*pnb_v[M]+2*png_v[M], 1); // for(jj=0; jj<nbu; jj++) // { // dM[jj] = - 0.5; // umin // dM[pnb_v[1]+jj] = 0.5; // umax // } // for(; jj<nb; jj++) // { // dM[jj] = - 4.0; // xmin // dM[pnb_v[1]+jj] = 4.0; // xmax // } // for(jj=0; jj<ng_v[M]; jj++) // { // dM[2*pnb_v[M]+jj] = - 0.5; // xmin // dM[2*pnb_v[M]+png_v[M]+jj] = - 0.5; // xmax // } double *C; d_zeros(&C, ng, nx); for(ii=0; ii<ng; ii++) C[ii*(ng+1)] = 1.0; double *D; d_zeros(&D, ng, nu); // first stage double *pDCt0; d_zeros_align(&pDCt0, pnux_v[0], cng_v[0]); // middle stage double *DC1; d_zeros(&DC1, ng_v[1], nu_v[1]+nx_v[1]); for(jj=0; jj<ng_v[1]; jj++) DC1[jj+(nu_v[1]+jj)*ng_v[1]] = 1.0; // d_print_mat(ng_v[1], nu_v[1]+nx_v[1], DC1, ng_v[1]); double *pDCt1; d_zeros_align(&pDCt1, pnux_v[1], cng_v[1]); d_cvt_tran_mat2pmat(ng_v[1], nu_v[1]+nx_v[1], DC1, ng_v[1], 0, pDCt1, cng_v[1]); // d_print_pmat(nu_v[1]+nx_v[1], ng_v[1], bs, pDCt1, cng_v[1]); // exit(2); // last stage double *DCN; d_zeros(&DCN, ng_v[N], nx_v[N]); for(jj=0; jj<ng_v[N]; jj++) DCN[jj*(ng_v[N]+1)] = 1.0; // d_print_mat(ng_v[N], nx_v[N], DCN, ng_v[N]); double *pDCtN; d_zeros_align(&pDCtN, pnx_v[N], cng_v[N]); d_cvt_tran_mat2pmat(ng_v[N], nx_v[N], DCN, ng_v[N], 0, pDCtN, cng_v[N]); // d_print_pmat(nx_v[N], ng_v[N], bs, pDCtN, cng_v[N]); // constrained stage // double *DCM; d_zeros(&DCM, ng_v[M], nu_v[M]+nx_v[M]); // for(jj=0; jj<ng_v[M]; jj++) DCM[jj+(jj+nu_v[M])*ng_v[M]] = 1.0; // d_print_mat(ng_v[M], nu_v[M]+nx_v[M], DCM, ng_v[M]); // double *pDCtM; d_zeros_align(&pDCtM, pnux_v[M], cng_v[M]); // d_cvt_tran_mat2pmat(ng_v[M], nu_v[M]+nx_v[M], DCM, ng_v[M], 0, pDCtM, cng_v[M]); // d_print_pmat(nu_v[M]+nx_v[M], ng_v[M], bs, pDCtM, cng_v[M]); // exit(2); /************************************************ * cost function ************************************************/ double *Q; d_zeros(&Q, nx, nx); for(ii=0; ii<nx; ii++) Q[ii*(nx+1)] = 1.0; double *R; d_zeros(&R, nu, nu); for(ii=0; ii<nu; ii++) R[ii*(nu+1)] = 2.0; double *S; d_zeros(&S, nu, nx); // S=0, so no need to update r0 double *q; d_zeros(&q, nx, 1); for(ii=0; ii<nx; ii++) q[ii] = 0.1; double *r; d_zeros(&r, nu, 1); for(ii=0; ii<nu; ii++) r[ii] = 0.2; #if KEEP_X0 double *pRSQ0; d_zeros_align(&pRSQ0, pnz, cnux); d_cvt_mat2pmat(nu, nu, R, nu, 0, pRSQ0, cnux); d_cvt_tran_mat2pmat(nu, nx, S, nu, nu, pRSQ0+nu/bs*bs*cnux+nu%bs, cnux); d_cvt_tran_mat2pmat(nu, 1, r, nu, nu+nx, pRSQ0+(nu+nx)/bs*bs*cnux+(nu+nx)%bs, cnux); d_cvt_mat2pmat(nx, nx, Q, nx, nu, pRSQ0+nu/bs*bs*cnux+nu%bs+nu*bs, cnux); d_cvt_tran_mat2pmat(nx, 1, q, nx, nu+nx, pRSQ0+(nu+nx)/bs*bs*cnux+(nu+nx)%bs+nu*bs, cnux); // d_print_pmat(nu+nx+1, nu+nx, bs, pRSQ0, cnux); double *rq0; d_zeros_align(&rq0, pnux, 1); d_copy_mat(nu, 1, r, nu, rq0, pnux); d_copy_mat(nx, 1, q, nx, rq0+nu, pnux); #else double *pRSQ0; d_zeros_align(&pRSQ0, pnu1, cnu); d_cvt_mat2pmat(nu, nu, R, nu, 0, pRSQ0, cnu); d_cvt_tran_mat2pmat(nu, 1, r, nu, nu, pRSQ0+nu/bs*bs*cnu+nu%bs, cnu); // d_print_pmat(nu+1, nu, bs, pRSQ0, cnu); double *rq0; d_zeros_align(&rq0, pnu, 1); d_copy_mat(nu, 1, r, nu, rq0, pnu); #endif double *pRSQ1; d_zeros_align(&pRSQ1, pnz, cnux); d_cvt_mat2pmat(nu, nu, R, nu, 0, pRSQ1, cnux); d_cvt_tran_mat2pmat(nu, nx, S, nu, nu, pRSQ1+nu/bs*bs*cnux+nu%bs, cnux); d_cvt_tran_mat2pmat(nu, 1, r, nu, nu+nx, pRSQ1+(nu+nx)/bs*bs*cnux+(nu+nx)%bs, cnux); d_cvt_mat2pmat(nx, nx, Q, nx, nu, pRSQ1+nu/bs*bs*cnux+nu%bs+nu*bs, cnux); d_cvt_tran_mat2pmat(nx, 1, q, nx, nu+nx, pRSQ1+(nu+nx)/bs*bs*cnux+(nu+nx)%bs+nu*bs, cnux); // d_print_pmat(nu+nx+1, nu+nx, bs, pRSQ1, cnux); double *rq1; d_zeros_align(&rq1, pnux, 1); d_copy_mat(nu, 1, r, nu, rq1, pnux); d_copy_mat(nx, 1, q, nx, rq1+nu, pnux); double *pRSQN; d_zeros_align(&pRSQN, pnx1, cnx); d_cvt_mat2pmat(nx, nx, Q, nx, 0, pRSQN, cnx); d_cvt_tran_mat2pmat(nx, 1, q, nx, nx, pRSQN+(nx)/bs*bs*cnx+(nx)%bs, cnx); // d_print_pmat(nx+1, nx, bs, pRSQN, cnx); double *rqN; d_zeros_align(&rqN, pnx, 1); d_copy_mat(nx, 1, q, nx, rqN, pnx); // maximum element in cost functions double mu0 = 2.0; /************************************************ * high level interface work space ************************************************/ #if 0 double *rA; d_zeros(&rA, nx, N*nx); d_rep_mat(N, nx, nx, A, nx, rA, nx); double *rB; d_zeros(&rB, nx, N*nu); d_rep_mat(N, nx, nu, B, nx, rB, nx); double *rC; d_zeros(&rC, ng, (N+1)*nx); d_rep_mat(N, ng, nx, C, ng, rC+nx*ng, ng); double *CN = DCN; double *rD; d_zeros(&rD, ng, N*nu); d_rep_mat(N, ng, nu, D, ng, rD, ng); double *rb; d_zeros(&rb, nx, N*1); d_rep_mat(N, nx, 1, b, nx, rb, nx); double *rQ; d_zeros(&rQ, nx, N*nx); d_rep_mat(N, nx, nx, Q, nx, rQ, nx); double *rQf; d_zeros(&rQf, nx, nx); d_copy_mat(nx, nx, Q, nx, rQf, nx); double *rS; d_zeros(&rS, nu, N*nx); d_rep_mat(N, nu, nx, S, nu, rS, nu); double *rR; d_zeros(&rR, nu, N*nu); d_rep_mat(N, nu, nu, R, nu, rR, nu); double *rq; d_zeros(&rq, nx, N); d_rep_mat(N, nx, 1, q, nx, rq, nx); double *rqf; d_zeros(&rqf, nx, 1); d_copy_mat(nx, 1, q, nx, rqf, nx); double *rr; d_zeros(&rr, nu, N); d_rep_mat(N, nu, 1, r, nu, rr, nu); double *lb; d_zeros(&lb, nb, 1); for(ii=0; ii<nb; ii++) lb[ii] = d1[ii]; double *rlb; d_zeros(&rlb, nb, N+1); d_rep_mat(N+1, nb, 1, lb, nb, rlb, nb); // d_print_mat(nb, N+1, rlb, nb); double *lg; d_zeros(&lg, ng, 1); for(ii=0; ii<ng; ii++) lg[ii] = d1[2*pnb_v[1]+ii]; double *rlg; d_zeros(&rlg, ng, N); d_rep_mat(N, ng, 1, lg, ng, rlg, ng); // d_print_mat(ng, N, rlg, ng); double *lgN; d_zeros(&lgN, ngN, 1); for(ii=0; ii<ngN; ii++) lgN[ii] = dN[2*pnb_v[N]+ii]; // d_print_mat(ngN, 1, lgN, ngN); double *ub; d_zeros(&ub, nb, 1); for(ii=0; ii<nb; ii++) ub[ii] = d1[pnb_v[1]+ii]; double *rub; d_zeros(&rub, nb, N+1); d_rep_mat(N+1, nb, 1, ub, nb, rub, nb); // d_print_mat(nb, N+1, rub, nb); double *ug; d_zeros(&ug, ng, 1); for(ii=0; ii<ng; ii++) ug[ii] = d1[2*pnb_v[1]+png_v[1]+ii]; double *rug; d_zeros(&rug, ng, N); d_rep_mat(N, ng, 1, ug, ng, rug, ng); // d_print_mat(ng, N, rug, ng); double *ugN; d_zeros(&ugN, ngN, 1); for(ii=0; ii<ngN; ii++) ugN[ii] = dN[2*pnb_v[N]+png_v[N]+ii]; // d_print_mat(ngN, 1, ugN, ngN); double *rx; d_zeros(&rx, nx, N+1); d_copy_mat(nx, 1, x0, nx, rx, nx); double *ru; d_zeros(&ru, nu, N); double *rpi; d_zeros(&rpi, nx, N); double *rlam; d_zeros(&rlam, N*2*(nb+ng)+2*(nb+ngN), 1); double *rt; d_zeros(&rt, N*2*(nb+ng)+2*(nb+ngN), 1); double *rwork = (double *) malloc(hpmpc_d_ip_mpc_hard_tv_work_space_size_bytes(N, nx, nu, nb, ng, ngN)); double inf_norm_res[4] = {}; // infinity norm of residuals: rq, rb, rd, mu #endif /************************************************ * low level interface work space ************************************************/ double *hpBAbt[N]; double *hpDCt[N+1]; double *hb[N]; double *hpRSQ[N+1]; double *hrq[N+1]; double *hd[N+1]; int *idx[N+1]; double *hux[N+1]; double *hpi[N]; double *hlam[N+1]; double *ht[N+1]; double *hrb[N]; double *hrrq[N+1]; double *hrd[N+1]; hpBAbt[0] = pBAbt0; hpDCt[0] = pDCt0; hb[0] = b0; hpRSQ[0] = pRSQ0; hrq[0] = rq0; hd[0] = d0; idx[0] = idx0; d_zeros_align(&hux[0], pnux_v[0], 1); d_zeros_align(&hpi[0], pnx_v[1], 1); d_zeros_align(&hlam[0], 2*pnb_v[0]+2*png_v[0], 1); d_zeros_align(&ht[0], 2*pnb_v[0]+2*png_v[0], 1); d_zeros_align(&hrb[0], pnx_v[1], 1); d_zeros_align(&hrrq[0], pnz_v[0], 1); d_zeros_align(&hrd[0], 2*pnb_v[0]+2*png_v[0], 1); for(ii=1; ii<N; ii++) { hpBAbt[ii] = pBAbt1; // d_zeros_align(&hpBAbt[ii], pnz_v[ii], cnx_v[ii+1]); for(jj=0; jj<pnz_v[ii]*cnx_v[ii+1]; jj++) hpBAbt[ii][jj] = pBAbt1[jj]; hpDCt[ii] = pDCt1; hb[ii] = b; hpRSQ[ii] = pRSQ1; // d_zeros_align(&hpRSQ[ii], pnz_v[ii], cnux_v[ii]); for(jj=0; jj<pnz_v[ii]*cnux_v[ii]; jj++) hpRSQ[ii][jj] = pRSQ1[jj]; hrq[ii] = rq1; hd[ii] = d1; idx[ii] = idx1; d_zeros_align(&hux[ii], pnux_v[ii], 1); d_zeros_align(&hpi[ii], pnx_v[ii+1], 1); d_zeros_align(&hlam[ii], 2*pnb_v[ii]+2*png_v[ii], 1); d_zeros_align(&ht[ii], 2*pnb_v[ii]+2*png_v[ii], 1); d_zeros_align(&hrb[ii], pnx_v[ii+1], 1); d_zeros_align(&hrrq[ii], pnz_v[ii], 1); d_zeros_align(&hrd[ii], 2*pnb_v[ii]+2*png_v[ii], 1); } hpDCt[N] = pDCtN; hpRSQ[N] = pRSQN; hrq[N] = rqN; hd[N] = dN; idx[N] = idxN; d_zeros_align(&hux[N], pnx, 1); d_zeros_align(&hlam[N], 2*pnb_v[N]+2*png_v[N], 1); d_zeros_align(&ht[N], 2*pnb_v[N]+2*png_v[N], 1); d_zeros_align(&hrrq[N], pnz_v[N], 1); d_zeros_align(&hrd[N], 2*pnb_v[N]+2*png_v[N], 1); // hpDCt[M] = pDCtM; // hd[M] = dM; double mu = 0.0; #if USE_IPM_RES double *work; d_zeros_align(&work, d_ip2_res_mpc_hard_tv_work_space_size_bytes(N, nx_v, nu_v, nb_v, ng_v)/sizeof(double), 1); #else double *work; d_zeros_align(&work, d_ip2_mpc_hard_tv_work_space_size_bytes(N, nx_v, nu_v, nb_v, ng_v)/sizeof(double), 1); #endif /************************************************ * (new) high level interface work space ************************************************/ // box constraints double *lb0; d_zeros(&lb0, nb_v[0], 1); for(ii=0; ii<nb_v[0]; ii++) lb0[ii] = d0[ii]; double *ub0; d_zeros(&ub0, nb_v[0], 1); for(ii=0; ii<nb_v[0]; ii++) ub0[ii] = d0[pnb_v[0]+ii]; double *lb1; d_zeros(&lb1, nb_v[1], 1); for(ii=0; ii<nb_v[1]; ii++) lb1[ii] = d1[ii]; double *ub1; d_zeros(&ub1, nb_v[1], 1); for(ii=0; ii<nb_v[1]; ii++) ub1[ii] = d1[pnb_v[1]+ii]; double *lbN; d_zeros(&lbN, nb_v[N], 1); for(ii=0; ii<nb_v[N]; ii++) lbN[ii] = dN[ii]; double *ubN; d_zeros(&ubN, nb_v[N], 1); for(ii=0; ii<nb_v[N]; ii++) ubN[ii] = dN[pnb_v[N]+ii]; // general constraints double *lg0; d_zeros(&lg0, ng_v[0], 1); for(ii=0; ii<ng_v[0]; ii++) lg0[ii] = d0[2*pnb_v[0]+ii]; double *ug0; d_zeros(&ug0, ng_v[0], 1); for(ii=0; ii<ng_v[0]; ii++) ug0[ii] = d0[2*pnb_v[0]+png_v[0]+ii]; double *lg1; d_zeros(&lg1, ng_v[1], 1); for(ii=0; ii<ng_v[1]; ii++) lg1[ii] = d1[2*pnb_v[1]+ii]; double *ug1; d_zeros(&ug1, ng_v[1], 1); for(ii=0; ii<ng_v[1]; ii++) ug1[ii] = d1[2*pnb_v[1]+png_v[1]+ii]; double *lgN; d_zeros(&lgN, ng_v[N], 1); for(ii=0; ii<ng_v[N]; ii++) lgN[ii] = dN[2*pnb_v[N]+ii]; double *ugN; d_zeros(&ugN, ng_v[N], 1); for(ii=0; ii<ng_v[N]; ii++) ugN[ii] = dN[2*pnb_v[N]+png_v[N]+ii]; // data matrices double *hA[N]; double *hB[N]; double *hC[N+1]; double *hD[N]; double *hQ[N+1]; double *hS[N]; double *hR[N]; double *hq[N+1]; double *hr[N]; double *hlb[N+1]; double *hub[N+1]; double *hlg[N+1]; double *hug[N+1]; double *hx[N+1]; double *hu[N]; double *hpi1[N]; double *hlam1[N+1]; double *ht1[N+1]; double inf_norm_res[4] = {}; // infinity norm of residuals: rq, rb, rd, mu ii = 0; hA[0] = A; hB[0] = B; hC[0] = C; hD[0] = D; hQ[0] = Q; hS[0] = S; hR[0] = R; hq[0] = q; hr[0] = r; hlb[0] = lb0; hub[0] = ub0; hlg[0] = lg0; hug[0] = ug0; d_zeros(&hx[0], nx_v[0], 1); d_zeros(&hu[0], nu_v[0], 1); d_zeros(&hpi1[0], nx_v[1], 1); d_zeros(&hlam1[0], 2*nb_v[0]+2*ng_v[0], 1); d_zeros(&ht1[0], 2*nb_v[0]+2*ng_v[0], 1); for(ii=1; ii<N; ii++) { hA[ii] = A; hB[ii] = B; hC[ii] = C; hD[ii] = D; hQ[ii] = Q; hS[ii] = S; hR[ii] = R; hq[ii] = q; hr[ii] = r; hlb[ii] = lb1; hub[ii] = ub1; hlg[ii] = lg1; hug[ii] = ug1; d_zeros(&hx[ii], nx_v[ii], 1); d_zeros(&hu[ii], nu_v[ii], 1); d_zeros(&hpi1[ii], nx_v[ii+1], 1); d_zeros(&hlam1[ii], 2*nb_v[ii]+2*ng_v[ii], 1); d_zeros(&ht1[ii], 2*nb_v[ii]+2*ng_v[ii], 1); } ii = N; hC[N] = C; hQ[N] = Q; hq[N] = q; hlb[N] = lbN; hub[N] = ubN; hlg[N] = lgN; hug[N] = ugN; d_zeros(&hx[N], nx_v[N], 1); d_zeros(&hlam1[N], 2*nb_v[N]+2*ng_v[N], 1); d_zeros(&ht1[N], 2*nb_v[N]+2*ng_v[N], 1); // work space #if 0 printf("work space in bytes: %d\n", hpmpc_d_ip_ocp_hard_tv_work_space_size_bytes(N, nx_v, nu_v, nb_v, ng_v)); exit(3); #endif void *work1 = malloc(hpmpc_d_ip_ocp_hard_tv_work_space_size_bytes(N, nx_v, nu_v, nb_v, ng_v)); double *ptr_work1 = (double *) work1; /************************************************ * solvers common stuff ************************************************/ int hpmpc_status; int kk, kk_avg; int k_max = 10; double mu_tol = 1e-20; double alpha_min = 1e-8; int warm_start = 0; // read initial guess from x and u double *stat; d_zeros(&stat, k_max, 5); int compute_res = 1; int compute_mult = 1; struct timeval tv0, tv1, tv2, tv3; double time; double **dummy; /************************************************ * call the solver (high-level interface) ************************************************/ #if 1 int time_invariant = 0; // assume the problem to be time invariant int free_x0 = 0; // assume x0 as optimization variable gettimeofday(&tv0, NULL); // stop kk_avg = 0; for(rep=0; rep<nrep; rep++) { // hpmpc_status = fortran_order_d_ip_mpc_hard_tv(&kk, k_max, mu0, mu_tol, N, nx, nu, nb, ng, ngN, time_invariant, free_x0, warm_start, rA, rB, rb, rQ, rQf, rS, rR, rq, rqf, rr, rlb, rub, rC, rD, rlg, rug, CN, lgN, ugN, rx, ru, rpi, rlam, rt, inf_norm_res, rwork, stat); hpmpc_status = fortran_order_d_ip_ocp_hard_tv(&kk, k_max, mu0, mu_tol, N, nx_v, nu_v, nb_v, ng_v, warm_start, hA, hB, hb, hQ, hS, hR, hq, hr, hlb, hub, hC, hD, hlg, hug, hx, hu, hpi1, hlam1, ht1, inf_norm_res, work1, stat); kk_avg += kk; } gettimeofday(&tv1, NULL); // stop printf("\nsolution from high-level interface\n\n"); // d_print_mat(nx, N+1, rx, nx); // d_print_mat(nu, N, ru, nu); for(ii=0; ii<=N; ii++) d_print_mat(1, nx_v[ii], hx[ii], 1); for(ii=0; ii<N; ii++) d_print_mat(1, nu_v[ii], hu[ii], 1); printf("\ninfinity norm of residuals\n\n"); d_print_mat_e(1, 4, inf_norm_res, 1); time = (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6); printf("\nstatistics from last run\n\n"); for(jj=0; jj<kk; jj++) printf("k = %d\tsigma = %f\talpha = %f\tmu = %f\t\tmu = %e\talpha = %f\tmu = %f\tmu = %e\n", jj, stat[5*jj], stat[5*jj+1], stat[5*jj+2], stat[5*jj+2], stat[5*jj+3], stat[5*jj+4], stat[5*jj+4]); printf("\n"); printf("\n"); printf(" Average number of iterations over %d runs: %5.1f\n", nrep, kk_avg / (double) nrep); printf(" Average solution time over %d runs: %5.2e seconds\n", nrep, time); printf("\n\n"); gettimeofday(&tv0, NULL); // stop kk_avg = 0; for(rep=0; rep<nrep; rep++) { // fortran_order_d_solve_kkt_new_rhs_mpc_hard_tv(N, nx, nu, nb, ng, ngN, time_invariant, free_x0, rA, rB, rb, rQ, rQf, rS, rR, rq, rqf, rr, rlb, rub, rC, rD, rlg, rug, CN, lgN, ugN, rx, ru, rpi, rlam, rt, inf_norm_res, rwork); fortran_order_d_solve_kkt_new_rhs_ocp_hard_tv(N, nx_v, nu_v, nb_v, ng_v, hA, hB, hb, hQ, hS, hR, hq, hr, hlb, hub, hC, hD, hlg, hug, hx, hu, hpi1, hlam1, ht1, inf_norm_res, work1); kk_avg += kk; } gettimeofday(&tv1, NULL); // stop printf("\nsolution from high-level interface (resolve final kkt)\n\n"); // d_print_mat(nx, N+1, rx, nx); // d_print_mat(nu, N, ru, nu); for(ii=0; ii<=N; ii++) d_print_mat(1, nx_v[ii], hx[ii], 1); for(ii=0; ii<N; ii++) d_print_mat(1, nu_v[ii], hu[ii], 1); printf("\ninfinity norm of residuals\n\n"); d_print_mat_e(1, 4, inf_norm_res, 1); time = (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6); printf(" Average solution time over %d runs: %5.2e seconds\n", nrep, time); #endif /************************************************ * call the solver (low-level interface) ************************************************/ // for(ii=0; ii<N; ii++) // d_print_pmat(nu_v[ii]+nx_v[ii]+1, nx_v[ii+1], bs, hpBAbt[ii], cnx_v[ii+1]); // exit(3); gettimeofday(&tv0, NULL); // stop kk_avg = 0; printf("\nsolution...\n"); for(rep=0; rep<nrep; rep++) { #if USE_IPM_RES hpmpc_status = d_ip2_res_mpc_hard_tv(&kk, k_max, mu0, mu_tol, alpha_min, warm_start, stat, N, nx_v, nu_v, nb_v, idx, ng_v, hpBAbt, hpRSQ, hpDCt, hd, hux, compute_mult, hpi, hlam, ht, work); #else hpmpc_status = d_ip2_mpc_hard_tv(&kk, k_max, mu0, mu_tol, alpha_min, warm_start, stat, N, nx_v, nu_v, nb_v, idx, ng_v, hpBAbt, hpRSQ, hpDCt, hd, hux, compute_mult, hpi, hlam, ht, work); #endif kk_avg += kk; } printf("\ndone\n"); gettimeofday(&tv1, NULL); // stop printf("\nsolution from low-level interface (original problem)\n\n"); printf("\nux\n\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nu_v[ii]+nx_v[ii], hux[ii], 1); printf("\npi\n\n"); for(ii=0; ii<N; ii++) d_print_mat(1, nx_v[ii+1], hpi[ii], 1); // printf("\nux\n\n"); // for(ii=0; ii<=N; ii++) // d_print_mat(1, 2*pnb_v[ii]+2*png_v[ii], hlam[ii], 1); // printf("\nux\n\n"); // for(ii=0; ii<=N; ii++) // d_print_mat(1, 2*pnb_v[ii]+2*png_v[ii], ht[ii], 1); // residuals if(compute_res) { // compute residuals d_res_mpc_hard_tv(N, nx_v, nu_v, nb_v, idx, ng_v, hpBAbt, hb, hpRSQ, hrq, hux, hpDCt, hd, hpi, hlam, ht, hrrq, hrb, hrd, &mu); // print residuals printf("\nhrrq\n\n"); for(ii=0; ii<=N; ii++) d_print_mat_e(1, nu_v[ii]+nx_v[ii], hrrq[ii], 1); printf("\nhrb\n\n"); for(ii=0; ii<N; ii++) d_print_mat_e(1, nx_v[ii+1], hrb[ii], 1); printf("\nhrd low\n\n"); for(ii=0; ii<=N; ii++) d_print_mat_e(1, nb_v[ii], hrd[ii], 1); printf("\nhrd up\n\n"); for(ii=0; ii<=N; ii++) d_print_mat_e(1, nb_v[ii], hrd[ii]+pnb_v[ii], 1); } // zero the solution again for(ii=0; ii<=N; ii++) for(jj=0; jj<nu_v[ii]+nx_v[ii]; jj++) hux[ii][jj] = 0.0; // modify constraints #if 0 for(jj=0; jj<nbx; jj++) { dN[jj] = - 4.0; // xmin dN[pnb_v[N]+jj] = 4.0; // xmax idxN[jj] = jj; } for(jj=0; jj<ng_v[N]; jj++) { dN[2*pnb_v[N]+jj] = 0.1; // xmin dN[2*pnb_v[N]+png_v[N]+jj] = 0.1; // xmax } #endif #if 0 for(ii=0; ii<=N; ii++) d_print_pmat(nu_v[ii]+nx_v[ii]+1, nu_v[ii]+nx_v[ii], bs, hpRSQ[ii], cnux_v[ii]); for(ii=0; ii<=N; ii++) d_print_mat(1, nu_v[ii]+nx_v[ii], hrq[ii], 1); exit(1); #endif gettimeofday(&tv2, NULL); // stop printf("\nsolution...\n"); for(rep=0; rep<nrep; rep++) { #if USE_IPM_RES d_kkt_solve_new_rhs_res_mpc_hard_tv(N, nx_v, nu_v, nb_v, idx, ng_v, hpBAbt, hb, hpRSQ, hrq, hpDCt, hd, hux, compute_mult, hpi, hlam, ht, work); #else d_kkt_solve_new_rhs_mpc_hard_tv(N, nx_v, nu_v, nb_v, idx, ng_v, hpBAbt, hb, hpRSQ, hrq, hpDCt, hd, hux, compute_mult, hpi, hlam, ht, work); #endif } printf("\ndone\n"); gettimeofday(&tv3, NULL); // stop printf("\nsolution from low-level interface (resolve final kkt)\n\n"); printf("\nux\n\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nu_v[ii]+nx_v[ii], hux[ii], 1); printf("\npi\n\n"); for(ii=0; ii<N; ii++) d_print_mat(1, nx_v[ii+1], hpi[ii], 1); // printf("\nux\n\n"); // for(ii=0; ii<=N; ii++) // d_print_mat(1, 2*pnb_v[ii]+2*png_v[ii], hlam[ii], 1); // printf("\nux\n\n"); // for(ii=0; ii<=N; ii++) // d_print_mat(1, 2*pnb_v[ii]+2*png_v[ii], ht[ii], 1); // residuals if(compute_res) { // compute residuals d_res_mpc_hard_tv(N, nx_v, nu_v, nb_v, idx, ng_v, hpBAbt, hb, hpRSQ, hrq, hux, hpDCt, hd, hpi, hlam, ht, hrrq, hrb, hrd, &mu); // print residuals printf("\nhrrq\n\n"); for(ii=0; ii<=N; ii++) d_print_mat_e(1, nu_v[ii]+nx_v[ii], hrrq[ii], 1); printf("\nhrb\n\n"); for(ii=0; ii<N; ii++) d_print_mat_e(1, nx_v[ii+1], hrb[ii], 1); printf("\nhrd low\n\n"); for(ii=0; ii<=N; ii++) d_print_mat_e(1, nb_v[ii], hrd[ii], 1); printf("\nhrd up\n\n"); for(ii=0; ii<=N; ii++) d_print_mat_e(1, nb_v[ii], hrd[ii]+pnb_v[ii], 1); } double time_ipm = (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6); double time_final = (tv3.tv_sec-tv2.tv_sec)/(nrep+0.0)+(tv3.tv_usec-tv2.tv_usec)/(nrep*1e6); printf("\nstatistics from last run\n\n"); for(jj=0; jj<kk; jj++) printf("k = %d\tsigma = %f\talpha = %f\tmu = %f\t\tmu = %e\talpha = %f\tmu = %f\tmu = %e\n", jj, stat[5*jj], stat[5*jj+1], stat[5*jj+2], stat[5*jj+2], stat[5*jj+3], stat[5*jj+4], stat[5*jj+4]); printf("\n"); printf("\n"); printf(" Average number of iterations over %d runs: %5.1f\n", nrep, kk_avg / (double) nrep); printf(" Average solution time over %d runs: %5.2e seconds (IPM)\n", nrep, time_ipm); printf(" Average solution time over %d runs: %5.2e seconds (resolve final kkt)\n", nrep, time_final); printf("\n\n"); /************************************************ * compute residuals ************************************************/ /************************************************ * free memory ************************************************/ // problem data free(A); free(B); d_free_align(b); d_free_align(x0); free(C); free(D); free(Q); free(S); free(R); free(q); free(r); // low level interface d_free_align(pA); d_free_align(b0); d_free_align(pBAbt0); d_free_align(pBAbt1); d_free_align(d0); d_free_align(d1); d_free_align(dN); d_free_align(pDCt0); d_free_align(pDCt1); free(DCN); d_free_align(pDCtN); free(idx0); free(idx1); free(idxN); d_free_align(pRSQ0); d_free_align(pRSQ1); d_free_align(pRSQN); d_free_align(rq0); d_free_align(rq1); d_free_align(rqN); d_free_align(work); free(stat); for(ii=0; ii<N; ii++) { d_free_align(hux[ii]); d_free_align(hpi[ii]); d_free_align(hlam[ii]); d_free_align(ht[ii]); d_free_align(hrb[ii]); d_free_align(hrrq[ii]); d_free_align(hrd[ii]); } d_free_align(hux[N]); d_free_align(hlam[N]); d_free_align(ht[N]); d_free_align(hrrq[N]); d_free_align(hrd[N]); #if 0 // high level interface free(rA); free(rB); free(rC); free(rD); free(rb); free(rQ); free(rQf); free(rS); free(rR); free(rq); free(rqf); free(rr); free(lb); free(rlb); free(lg); free(rlg); free(lgN); free(ub); free(rub); free(ug); free(rug); free(ugN); free(rx); free(ru); free(rpi); free(rlam); free(rt); free(rwork); #endif // new high level interface free(lb0); free(ub0); free(lb1); free(ub1); free(lbN); free(ubN); free(lg0); free(ug0); free(lg1); free(ug1); free(work1); for(ii=0; ii<N; ii++) { free(hx[ii]); free(hu[ii]); free(hpi1[ii]); free(hlam1[ii]); free(ht1[ii]); } free(hx[N]); free(hlam1[N]); free(ht1[N]); return 0; }