int main() { printf("\n"); printf("\n"); printf("\n"); printf(" HPMPC -- Library for High-Performance implementation of solvers for MPC.\n"); printf(" Copyright (C) 2014-2015 by Technical University of Denmark. All rights reserved.\n"); printf("\n"); printf(" HPMPC is distributed in the hope that it will be useful,\n"); printf(" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"); printf(" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"); printf(" See the GNU Lesser General Public License for more details.\n"); printf("\n"); printf("\n"); printf("\n"); int ii, jj, ll; double **dummy; int ** int_dummy; const int bs = D_MR; //d_get_mr(); const int ncl = D_NCL; const int nal = bs*ncl; // number of doubles per cache line int nx, nu, N, nrep; // timing variables float time_ric_diag, time_ric_full, time_ric_full_tv, time_ip_diag, time_ip_full, time_ip_full_tv; /************************************************ * test of riccati eye/diag & size-variant ************************************************/ #if 1 // horizon length N = 11; // base nx and nu int nx0 = 2; int nu0 = 1; // size-varing int nxx[N+1]; for(ii=0; ii<=N; ii++) nxx[ii] = (N+1-ii)*nx0 + nu0; int pnxx[N+1]; for(ii=0; ii<=N; ii++) pnxx[ii] = (nxx[ii]+bs-1)/bs*bs; int cnxx[N+1]; for(ii=0; ii<=N; ii++) cnxx[ii] = (nxx[ii]+ncl-1)/ncl*ncl; int nuu[N+1]; for(ii=0; ii<N; ii++) nuu[ii] = nu0; nuu[N] = 0; // !!!!! int pnuu[N+1]; for(ii=0; ii<N; ii++) pnuu[ii] = (nuu[ii]+bs-1)/bs*bs; pnuu[N] = 0; // !!!!! int cnuu[N+1]; for(ii=0; ii<N; ii++) cnuu[ii] = (nuu[ii]+ncl-1)/ncl*ncl; cnuu[N] = 0; // !!!!! //for(ii=0; ii<=N; ii++) printf("\n%d %d %d\n", nxx[ii], pnxx[ii], cnxx[ii]); //for(ii=0; ii<N; ii++) printf("\n%d %d %d\n", nuu[ii], pnuu[ii], cnuu[ii]); // factorization printf("\nRiccati diag\n\n"); // data memory space double *hdA[N]; double *hpBt[N]; double *hpR[N]; double *hpS[N]; double *hpQ[N+1]; double *hpLK[N]; double *hpP[N+1]; double *pK; for(ii=0; ii<N; ii++) { d_zeros_align(&hdA[ii], pnxx[ii], 1); d_zeros_align(&hpBt[ii], pnuu[ii], cnxx[ii+1]); d_zeros_align(&hpR[ii], pnuu[ii], cnuu[ii]); d_zeros_align(&hpS[ii], pnxx[ii], cnuu[ii]); d_zeros_align(&hpQ[ii], pnxx[ii], cnxx[ii]); d_zeros_align(&hpLK[ii], pnuu[ii]+pnxx[ii], cnuu[ii]); d_zeros_align(&hpP[ii], pnxx[ii], cnxx[ii]); } d_zeros_align(&hpQ[N], pnxx[N], cnxx[N]); d_zeros_align(&hpP[N], pnxx[N], cnxx[N]); d_zeros_align(&pK, pnxx[0], cnuu[0]); // max(nx) x nax(nu) // dA for(ii=0; ii<N; ii++) for(jj=0; jj<nxx[ii+1]; jj++) hdA[ii][jj] = 1.0; //d_print_mat(1, cnxx[1], hdA[0], 1); // B double *eye_nu0; d_zeros(&eye_nu0, nu0, nu0); for(jj=0; jj<nu0; jj++) eye_nu0[jj*(nu0+1)] = 1.0; double *ptrB = BBB; for(ii=0; ii<N; ii++) { d_cvt_mat2pmat(nuu[ii], nuu[ii], eye_nu0, nuu[ii], 0, hpBt[ii], cnxx[ii+1]); d_cvt_tran_mat2pmat(nxx[ii+1]-nuu[ii], nuu[ii], ptrB, nxx[ii+1]-nuu[ii], 0, hpBt[ii]+nuu[ii]*bs, cnxx[ii+1]); ptrB += nxx[ii+1] - nuu[ii]; } free(eye_nu0); //d_print_pmat(pnuu[0], cnxx[1], bs, hpBt[0], cnxx[0]); //d_print_pmat(pnuu[1], cnxx[2], bs, hpBt[1], cnxx[1]); //d_print_pmat(pnuu[2], cnxx[3], bs, hpBt[2], cnxx[2]); //d_print_pmat(pnuu[N-1], cnxx[N-1], bs, hpBt[N-2], cnxx[N-2]); //d_print_pmat(pnuu[N-1], cnxx[N], bs, hpBt[N-1], cnxx[N-1]); // R // penalty on du for(ii=0; ii<N; ii++) for(jj=0; jj<nuu[ii]; jj++) hpR[ii][jj/bs*bs*cnuu[ii]+jj%bs+jj*bs] = 0.0; //for(ii=0; ii<N; ii++) // d_print_pmat(pnuu[ii], cnuu[ii], bs, hpR[ii], pnuu[ii]); //d_print_pmat(pnuu[0], cnuu[0], bs, hpR[0], pnuu[0]); // S (zero) // Q for(ii=0; ii<=N; ii++) { // penalty on u for(jj=0; jj<nu0; jj++) hpQ[ii][jj/bs*bs*cnxx[ii]+jj%bs+jj*bs] = 1.0; // penalty on x // for(jj==1; jj<nxx[ii]-nx0; jj++) // hpQ[ii][jj/bs*bs*cnxx[ii]+jj%bs+jj*bs] = 0.0002; for(jj=nxx[ii]-nx0; jj<nxx[ii]; jj++) hpQ[ii][jj/bs*bs*cnxx[ii]+jj%bs+jj*bs] = 1.0; } //for(ii=0; ii<=N; ii++) // d_print_pmat(pnxx[ii], cnxx[ii], bs, hpQ2[ii], cnxx[ii]); //d_print_pmat(pnxx[0], cnxx[0], bs, hpQ2[0], cnxx[0]); //d_print_pmat(pnxx[1], cnxx[1], bs, hpQ2[1], cnxx[1]); //d_print_pmat(pnxx[N-1], cnxx[N-1], bs, hpQ2[N-1], cnxx[N-1]); //d_print_pmat(pnxx[N], cnxx[N], bs, hpQ2[N], cnxx[N]); //exit(1); // work space double *diag; d_zeros_align(&diag, pnxx[0]+pnuu[0], 1); // factorization printf("\nfactorization ...\n"); d_ric_diag_trf_mpc(N, nxx, nuu, hdA, hpBt, hpR, hpS, hpQ, hpLK, pK, hpP, diag); printf("\nfactorization done\n\n"); #if 1 //d_print_pmat(nxx[0], nxx[0], bs, hpP[0], cnxx[0]); //d_print_pmat(nxx[1], nxx[1], bs, hpP[1], cnxx[1]); //d_print_pmat(nxx[N-2], nxx[N-2], bs, hpP[N-2], cnxx[N-2]); //d_print_pmat(nxx[N-1], nxx[N-1], bs, hpP[N-1], cnxx[N-1]); //d_print_pmat(nxx[N], nxx[N], bs, hpP[N], cnxx[N]); //for(ii=0; ii<=N; ii++) // d_print_pmat(pnuu[ii]+nxx[ii], nuu[ii], bs, hpLK[ii], cnuu[ii]); //d_print_pmat(pnuu[0]+nxx[0], nuu[0], bs, hpLK[0], cnuu[0]); //d_print_pmat(pnuu[1]+nxx[1], nuu[1], bs, hpLK[1], cnuu[1]); //d_print_pmat(pnuu[2]+nxx[2], nuu[2], bs, hpLK[2], cnuu[2]); //d_print_pmat(pnuu[N-3]+nxx[N-3], nuu[N-3], bs, hpLK[N-3], cnuu[N-3]); //d_print_pmat(pnuu[N-2]+nxx[N-2], nuu[N-2], bs, hpLK[N-2], cnuu[N-2]); //d_print_pmat(pnuu[N-1]+nxx[N-1], nuu[N-1], bs, hpLK[N-1], cnuu[N-1]); #endif // backward-forward solution // data memory space double *hrq[N+1]; double *hux[N+1]; double *hpi[N+1]; double *hPb[N]; double *hb[N]; for(ii=0; ii<N; ii++) { d_zeros_align(&hrq[ii], pnuu[ii]+pnxx[ii], 1); d_zeros_align(&hux[ii], pnuu[ii]+pnxx[ii], 1); d_zeros_align(&hpi[ii], pnxx[ii], 1); d_zeros_align(&hPb[ii], pnxx[ii+1], 1); d_zeros_align(&hb[ii], pnxx[ii+1], 1); } d_zeros_align(&hrq[N], pnuu[N]+pnxx[N], 1); d_zeros_align(&hux[N], pnuu[N]+pnxx[N], 1); d_zeros_align(&hpi[N], pnxx[N], 1); double *work_diag; d_zeros_align(&work_diag, pnxx[0], 1); for(ii=0; ii<=N; ii++) for(jj=0; jj<nuu[ii]; jj++) hrq[ii][jj] = 0.0; for(ii=0; ii<=N; ii++) for(jj=0; jj<nxx[ii]; jj++) hrq[ii][nuu[ii]+jj] = 0.0; for(ii=0; ii<N; ii++) for(jj=0; jj<nxx[ii+1]; jj++) hb[ii][jj] = 0.0; // x0 for(jj=0; jj<nuu[0]; jj++) { hux[0][jj] = 0.0; } for(; jj<nuu[0]+nu0; jj++) { hux[0][jj] = 7.5097; } for(; jj<nxx[0]; jj+=2) { hux[0][jj+0] = 15.01940; hux[0][jj+1] = 0.0; } //d_print_mat(1, nuu[0]+nxx[0], hux2[0], 1); printf("\nbackward-forward solution ...\n"); d_ric_diag_trs_mpc(N, nxx, nuu, hdA, hpBt, hpLK, hpP, hb, hrq, hux, 1, hPb, 1, hpi, work_diag); printf("\nbackward-forward solution done\n\n"); #if 1 printf("\nux\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nuu[ii]+nxx[ii], hux[ii], 1); #endif // residuals // data memory space double *hres_rq[N+1]; double *hres_b[N]; for(ii=0; ii<N; ii++) { d_zeros_align(&hres_rq[ii], pnuu[ii]+pnxx[ii], 1); d_zeros_align(&hres_b[ii], pnxx[ii+1], 1); } d_zeros_align(&hres_rq[N], pnuu[N]+pnxx[N], 1); printf("\nresuduals ...\n"); d_res_diag_mpc(N, nxx, nuu, hdA, hpBt, hpR, hpS, hpQ, hb, hrq, hux, hpi, hres_rq, hres_b, work_diag); printf("\nresiduals done\n\n"); #if 1 printf("\nres_q\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nuu[ii]+nxx[ii], hres_rq[ii], 1); printf("\nres_b\n"); for(ii=0; ii<N; ii++) d_print_mat(1, nxx[ii+1], hres_b[ii], 1); #endif // timing struct timeval tv20, tv21; #if 1 printf("\ntiming ...\n\n"); gettimeofday(&tv20, NULL); // start nrep = 10000; for(ii=0; ii<nrep; ii++) { d_ric_diag_trf_mpc(N, nxx, nuu, hdA, hpBt, hpR, hpS, hpQ, hpLK, pK, hpP, diag); d_ric_diag_trs_mpc(N, nxx, nuu, hdA, hpBt, hpLK, hpP, hb, hrq, hux, 1, hPb, 1, hpi, work_diag); } gettimeofday(&tv21, NULL); // start time_ric_diag = (float) (tv21.tv_sec-tv20.tv_sec)/(nrep+0.0)+(tv21.tv_usec-tv20.tv_usec)/(nrep*1e6); printf("\ntiming done\n\n"); #endif #if 1 printf("\nRiccati full\n\n"); // size-variant full int nzz[N+1]; for(ii=0; ii<=N; ii++) nzz[ii] = nuu[ii] + nxx[ii] + 1; int pnzz[N+1]; for(ii=0; ii<=N; ii++) pnzz[ii] = (nzz[ii]+bs-1)/bs*bs; int cnzz[N+1]; for(ii=0; ii<=N; ii++) cnzz[ii] = (nzz[ii]+ncl-1)/ncl*ncl; int anzz[N+1]; for(ii=0; ii<=N; ii++) anzz[ii] = (nzz[ii]+nal-1)/nal*nal; int cnll[N+1]; for(ii=0; ii<=N; ii++) cnll[ii] = cnzz[ll]<cnxx[ll]+ncl ? cnxx[ll]+ncl : cnzz[ll]; int nzero[N+1]; for(ii=0; ii<=N; ii++) nzero[ii] = 0; double *hpBAbt_tv[N]; double *hpRSQ_tv[N+1]; double *hpL_tv[N+1]; double *hl[N+1]; for(ii=0; ii<N; ii++) { d_zeros_align(&hpBAbt_tv[ii], pnzz[ii], cnxx[ii+1]); d_zeros_align(&hpRSQ_tv[ii], pnzz[ii], cnzz[ii]); d_zeros_align(&hpL_tv[ii], pnzz[ii], cnll[ii]); d_zeros_align(&hl[ii], anzz[ii], 1); } d_zeros_align(&hpRSQ_tv[N], pnzz[N], cnzz[N]); d_zeros_align(&hpL_tv[N], pnzz[N], cnll[N]); d_zeros_align(&hl[N], anzz[ii], 1); double *work_ric_tv; d_zeros_align(&work_ric_tv, pnzz[0], cnxx[0]); for(ii=0; ii<N; ii++) { d_copy_pmat(nuu[ii], nxx[ii+1], bs, hpBt[ii], cnxx[ii], hpBAbt_tv[ii], cnxx[ii+1]); for(jj=0; jj<nxx[ii+1]; jj++) hpBAbt_tv[ii][(nuu[ii]+jj)/bs*bs*cnxx[ii+1]+(nuu[ii]+jj)%bs+jj*bs] = 1.0; for(jj=0; jj<nxx[ii+1]; jj++) hpBAbt_tv[ii][(nuu[ii]+nxx[ii])/bs*bs*cnxx[ii+1]+(nuu[ii]+nxx[ii])%bs+jj*bs] = hb[ii][jj]; //d_print_pmat(nzz[ii], nxx[ii+1], bs, hpBAbt_tv[ii], cnxx[ii+1]); } for(ii=0; ii<=N; ii++) { // R // penalty on du for(jj=0; jj<nuu[ii]; jj++) hpRSQ_tv[ii][jj/bs*bs*cnzz[ii]+jj%bs+jj*bs] = 0.0; // Q // penalty on u for(; jj<nuu[ii]+nu0; jj++) hpRSQ_tv[ii][jj/bs*bs*cnzz[ii]+jj%bs+jj*bs] = 1.0; // penalty on x for(jj=nuu[ii]+nxx[ii]-nx0; jj<nuu[ii]+nxx[ii]; jj++) hpRSQ_tv[ii][jj/bs*bs*cnzz[ii]+jj%bs+jj*bs] = 1.0; // r q for(jj=0; jj<nuu[ii]+nxx[ii]; jj++) hpRSQ_tv[ii][(nuu[ii]+nxx[ii])/bs*bs*cnzz[ii]+(nuu[ii]+nxx[ii])%bs+jj*bs] = hrq[ii][jj]; //d_print_pmat(nzz[ii], nzz[ii], bs, hpRSQ_tv[ii], cnzz[ii]); } printf("\nfactorization and backward-forward solution ...\n"); d_ric_sv_mpc_tv(N, nxx, nuu, hpBAbt_tv, hpRSQ_tv, hux, hpL_tv, work_ric_tv, diag, COMPUTE_MULT, hpi, nzero, int_dummy, dummy, dummy, nzero, dummy, dummy, dummy, 0); printf("\nfactorization and backward-forward solution done\n\n"); #if 0 for(ii=0; ii<=N; ii++) d_print_pmat(nzz[ii], nzz[ii], bs, hpL_tv[ii], cnzz[ii]); #endif printf("\nux\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nuu[ii]+nxx[ii], hux[ii], 1); for(ii=0; ii<N; ii++) for(jj=0; jj<nxx[ii+1]; jj++) hux[ii+1][nuu[ii+1]+jj] = hb[ii][jj]; printf("\nbackward-forward solution ...\n"); d_ric_trs_mpc_tv(N, nxx, nuu, hpBAbt_tv, hpL_tv, hrq, hl, hux, work_ric_tv, 1, hPb, COMPUTE_MULT, hpi, nzero, int_dummy, dummy, nzero, dummy, dummy); printf("\nbackward-forward solution done\n\n"); printf("\nux\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nuu[ii]+nxx[ii], hux[ii], 1); //exit(1); printf("\nresuduals ...\n"); d_res_diag_mpc(N, nxx, nuu, hdA, hpBt, hpR, hpS, hpQ, hb, hrq, hux, hpi, hres_rq, hres_b, work_diag); printf("\nresiduals done\n\n"); #if 1 printf("\nres_q\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nuu[ii]+nxx[ii], hres_rq[ii], 1); printf("\nres_b\n"); for(ii=0; ii<N; ii++) d_print_mat(1, nxx[ii+1], hres_b[ii], 1); #endif #if 1 printf("\ntiming ...\n\n"); gettimeofday(&tv20, NULL); // start nrep = 10000; for(ii=0; ii<nrep; ii++) { d_ric_sv_mpc_tv(N, nxx, nuu, hpBAbt_tv, hpRSQ_tv, hux, hpL_tv, work_ric_tv, diag, COMPUTE_MULT, hpi, nzero, int_dummy, dummy, dummy, nzero, dummy, dummy, dummy, 0); } gettimeofday(&tv21, NULL); // start time_ric_full_tv = (float) (tv21.tv_sec-tv20.tv_sec)/(nrep+0.0)+(tv21.tv_usec-tv20.tv_usec)/(nrep*1e6); printf("\ntiming done\n\n"); #endif #endif #if 1 // IPM printf("\nIPM diag\n\n"); int kk = -1; int kmax = 50; double mu0 = 1; double mu_tol = 1e-8; double alpha_min = 1e-12; double sigma_par[] = {0.4, 0.3, 0.01}; double stat[5*50] = {}; int nbb[N+1]; nbb[0] = nu0;//nuu[0]; // XXX !!!!!!!!!!!!!! for(ii=1; ii<N; ii++) nbb[ii] = 2*nu0 + nx0; //nuu[ii] + nxx[ii]; nbb[N] = nu0 + nx0; int *(idxb[N+1]); for(ii=0; ii<=N; ii++) { idxb[ii] = (int *) malloc(nbb[ii]*sizeof(int)); } int pnbb[N+1]; for(ii=0; ii<=N; ii++) pnbb[ii] = (nbb[ii]+bs-1)/bs*bs; // data memory space double *hd[N+1]; double *hlam[N+1]; double *ht[N+1]; double *hres_d[N+1]; for(ii=0; ii<=N; ii++) { d_zeros_align(&hd[ii], 2*pnbb[ii], 1); d_zeros_align(&hlam[ii], 2*pnbb[ii], 1); d_zeros_align(&ht[ii], 2*pnbb[ii], 1); d_zeros_align(&hres_d[ii], 2*pnbb[ii], 1); } double mu = -1; //printf("\nbounds\n"); ii = 0; // initial stage ll = 0; for(jj=0; jj<nuu[ii]; jj++) { hd[ii][ll] = -20.5; hd[ii][pnbb[ii]+ll] = -20.5; idxb[ii][ll] = jj; ll++; } //d_print_mat(1, 2*pnbb[ii], hd[ii], 1); for(ii=1; ii<=N; ii++) { ll = 0; for(jj=0; jj<nuu[ii]; jj++) { hd[ii][ll] = -20.5; hd[ii][pnbb[ii]+ll] = -20.5; idxb[ii][ll] = jj; ll++; } for(; jj<nuu[ii]+nu0; jj++) { hd[ii][ll] = - 2.5; // -2.5 hd[ii][pnbb[ii]+ll] = -10.0; // -10 idxb[ii][ll] = jj; ll++; } //for(; jj<nbb[ii]-nx0; jj++) //for(; jj<nbb[ii]; jj++) //{ //hd[ii][jj] = -100.0; //hd[ii][pnbb[ii]+jj] = -100.0; //idxb[ii][ll] = jj; //ll++; //} jj += nx0*(N-ii); hd[ii][ll+0] = - 0.0; // 0 hd[ii][pnbb[ii]+ll+0] = -20.0; // -20 idxb[ii][ll] = jj; ll++; jj++; hd[ii][ll+0] = -10.0; // -10 hd[ii][pnbb[ii]+ll+0] = -10.0; // -10 idxb[ii][ll] = jj; ll++; jj++; //d_print_mat(1, 2*pnbb[ii], hd[ii], 1); } #if 0 for(ii=0; ii<=N; ii++) { for(jj=0; jj<nbb[ii]; jj++) printf("%d\t", idxb[ii][jj]); printf("\n"); } exit(1); #endif for(jj=0; jj<nuu[0]; jj++) { hux[0][jj] = 0.0; } for(; jj<nuu[0]+nu0; jj++) { hux[0][jj] = 7.5097; } for(; jj<nxx[0]; jj+=2) { hux[0][jj+0] = 15.01940; hux[0][jj+1] = 0.0; } //d_print_mat(1, nuu[0]+nxx[0], hux2[0], 1); int pnxM = pnxx[0]; int pnuM = pnuu[0]; int cnuM = cnuu[0]; int anxx[N+1]; for(ii=0; ii<=N; ii++) anxx[ii] = (nxx[ii]+nal-1)/nal*nal; int anuu[N+1]; for(ii=0; ii<=N; ii++) anuu[ii] = (nuu[ii]+nal-1)/nal*nal; int work_space_ip_double = 0; for(ii=0; ii<=N; ii++) work_space_ip_double += anuu[ii] + 3*anxx[ii] + (pnuu[ii]+pnxx[ii])*cnuu[ii] + pnxx[ii]*cnxx[ii] + 12*pnbb[ii]; work_space_ip_double += pnxM*cnuM + pnxM + pnuM; int work_space_ip_int = (N+1)*7*sizeof(int); work_space_ip_int = (work_space_ip_int+63)/64*64; work_space_ip_int /= sizeof(int); printf("\nIPM diag work space size: %d double + %d int\n\n", work_space_ip_double, work_space_ip_int); double *work_space_ip; d_zeros_align(&work_space_ip, work_space_ip_double+(work_space_ip_int+1)/2, 1); // XXX assume sizeof(double) = 2 * sizeof(int) !!!!! printf("\nIPM solution ...\n"); d_ip2_diag_mpc(&kk, kmax, mu0, mu_tol, alpha_min, 0, sigma_par, stat, N, nxx, nuu, nbb, idxb, hdA, hpBt, hpR, hpS, hpQ, hb, hd, hrq, hux, 1, hpi, hlam, ht, work_space_ip); printf("\nIPM solution done\n"); printf("\nux\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nuu[ii]+nxx[ii], hux[ii], 1); printf("\nlam\n"); for(ii=0; ii<=N; ii++) { d_print_mat(1, nbb[ii], hlam[ii], 1); d_print_mat(1, nbb[ii], hlam[ii]+pnbb[ii], 1); } printf("\nt\n"); for(ii=0; ii<=N; ii++) { d_print_mat(1, nbb[ii], ht[ii], 1); d_print_mat(1, nbb[ii], ht[ii]+pnbb[ii], 1); } printf("\nstatistics\n\n"); for(ii=0; ii<kk; ii++) printf("%d\t%f\t%f\t%f\t%e\t%f\t%f\t%e\n", ii+1, stat[5*ii+0], stat[5*ii+1], stat[5*ii+2], stat[5*ii+2], stat[5*ii+3], stat[5*ii+4], stat[5*ii+4]); printf("\n\n"); // residuals printf("\nresuduals IPM ...\n"); d_res_ip_diag_mpc(N, nxx, nuu, nbb, idxb, hdA, hpBt, hpR, hpS, hpQ, hb, hrq, hd, hux, hpi, hlam, ht, hres_rq, hres_b, hres_d, &mu, work_diag); printf("\nresiduals IPM done\n"); printf("\nres_rq\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nuu[ii]+nxx[ii], hres_rq[ii], 1); printf("\nres_b\n"); for(ii=0; ii<N; ii++) d_print_mat(1, nxx[ii+1], hres_b[ii], 1); printf("\nres_d\n"); for(ii=0; ii<=N; ii++) { d_print_mat(1, nbb[ii], hres_d[ii], 1); d_print_mat(1, nbb[ii], hres_d[ii]+pnbb[ii], 1); } printf("\nres_mu\n"); d_print_mat(1, 1, &mu, 1); // timing printf("\ntiming ...\n\n"); gettimeofday(&tv20, NULL); // start nrep = 1000; for(ii=0; ii<nrep; ii++) { d_ip2_diag_mpc(&kk, kmax, mu0, mu_tol, alpha_min, 0, sigma_par, stat, N, nxx, nuu, nbb, idxb, hdA, hpBt, hpR, hpS, hpQ, hb, hd, hrq, hux, 1, hpi, hlam, ht, work_space_ip); } gettimeofday(&tv21, NULL); // start printf("\ntiming done\n\n"); time_ip_diag = (float) (tv21.tv_sec-tv20.tv_sec)/(nrep+0.0)+(tv21.tv_usec-tv20.tv_usec)/(nrep*1e6); // simulation printf("\nsimulation ...\n\n"); nrep = 15; for(ii=0; ii<nrep; ii++) { d_ip2_diag_mpc(&kk, kmax, mu0, mu_tol, alpha_min, 0, sigma_par, stat, N, nxx, nuu, nbb, idxb, hdA, hpBt, hpR, hpS, hpQ, hb, hd, hrq, hux, 1, hpi, hlam, ht, work_space_ip); dgemv_t_lib(nuu[0], nxx[0], hpBt[0], cnxx[0], hux[0], hux[0]+nuu[0], 1); for(jj=0; jj<nxx[0]-nx0-nu0; jj++) hux[0][nuu[0]+nxx[0]-jj-1] = hux[0][nuu[0]+nxx[0]-jj-1-nx0]; printf("\nsimulation step = %d, IPM iterations = %d, mu = %e\n\n", ii, kk, stat[5*(kk-1)+4]); d_print_mat(1, nuu[0]+nxx[0], hux[0], 1); } printf("\nsimulation done\n\n"); //exit(1); #if 1 // IPM printf("\nIPM full\n\n"); int ngg[N+1]; for(ii=0; ii<=N; ii++) ngg[ii] = 0; int pngg[N+1]; for(ii=0; ii<=N; ii++) pngg[ii] = (ngg[ii]+bs-1)/bs*bs; //int pnzM = pnzz[0]; // max //int cnxgM = cnxx[0]; // max //int work_space_int_size = 7*(N+1); //int work_space_double_size = pnzM*cnxgM + pnzM; //for(ii=0; ii<=N; ii++) // work_space_double_size += pnzz[ii]*cnll[ii] + 3*anzz[ii] + 2*anxx[ii] + 14*pnbb[ii] + 10*pngg[ii]; //printf("\nIPM diag work space size: %d double + %d int\n\n", work_space_double_size, work_space_int_size); //double *work_ipm_tv_double; d_zeros_align(&work_ipm_tv_double, work_space_double_size, 1); double *work_ipm_tv_double; d_zeros_align(&work_ipm_tv_double, d_ip2_hard_mpc_tv_work_space_size_double(N, nxx, nuu, nbb, ngg), 1); //int *work_ipm_tv_int = (int *) malloc(work_space_int_size*sizeof(int)); int *work_ipm_tv_int = (int *) malloc(d_ip2_hard_mpc_tv_work_space_size_int(N, nxx, nuu, nbb, ngg)*sizeof(int)); for(jj=0; jj<nuu[0]; jj++) { hux[0][jj] = 0.0; } for(; jj<nuu[0]+nu0; jj++) { hux[0][jj] = 7.5097; } for(; jj<nxx[0]; jj+=2) { hux[0][jj+0] = 15.01940; hux[0][jj+1] = 0.0; } //d_print_mat(1, nuu[0]+nxx[0], hux2[0], 1); printf("\nIPM solution ...\n"); d_ip2_hard_mpc_tv(&kk, kmax, mu0, mu_tol, alpha_min, 0, sigma_par, stat, N, nxx, nuu, nbb, idxb, ngg, hpBAbt_tv, hpRSQ_tv, dummy, hd, hux, 1, hpi, hlam, ht, work_ipm_tv_double, work_ipm_tv_int); printf("\nIPM solution done\n"); printf("\nux\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nuu[ii]+nxx[ii], hux[ii], 1); printf("\nlam\n"); for(ii=0; ii<=N; ii++) { d_print_mat(1, nbb[ii], hlam[ii], 1); d_print_mat(1, nbb[ii], hlam[ii]+pnbb[ii], 1); } printf("\nt\n"); for(ii=0; ii<=N; ii++) { d_print_mat(1, nbb[ii], ht[ii], 1); d_print_mat(1, nbb[ii], ht[ii]+pnbb[ii], 1); } printf("\nstatistics\n\n"); for(ii=0; ii<kk; ii++) printf("%d\t%f\t%f\t%f\t%e\t%f\t%f\t%e\n", ii+1, stat[5*ii+0], stat[5*ii+1], stat[5*ii+2], stat[5*ii+2], stat[5*ii+3], stat[5*ii+4], stat[5*ii+4]); printf("\n\n"); printf("\nresiduals ...\n\n"); d_res_ip_hard_mpc_tv(N, nxx, nuu, nbb, idxb, ngg, hpBAbt_tv, hpRSQ_tv, hrq, hux, dummy, hd, hpi, hlam, ht, hres_rq, hres_b, hres_d, &mu); printf("\nresiduals dones\n\n"); printf("\nres_rq\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nuu[ii]+nxx[ii], hres_rq[ii], 1); printf("\nres_b\n"); for(ii=0; ii<N; ii++) d_print_mat(1, nxx[ii+1], hres_b[ii], 1); printf("\nres_d\n"); for(ii=0; ii<=N; ii++) { d_print_mat(1, nbb[ii], hres_d[ii], 1); d_print_mat(1, nbb[ii], hres_d[ii]+pnbb[ii], 1); } printf("\nres_mu\n"); d_print_mat(1, 1, &mu, 1); // timing printf("\ntiming ...\n\n"); gettimeofday(&tv20, NULL); // start nrep = 1000; for(ii=0; ii<nrep; ii++) { d_ip2_hard_mpc_tv(&kk, kmax, mu0, mu_tol, alpha_min, 0, sigma_par, stat, N, nxx, nuu, nbb, idxb, ngg, hpBAbt_tv, hpRSQ_tv, dummy, hd, hux, 1, hpi, hlam, ht, work_ipm_tv_double, work_ipm_tv_int); } gettimeofday(&tv21, NULL); // start printf("\ntiming done\n\n"); time_ip_full_tv = (float) (tv21.tv_sec-tv20.tv_sec)/(nrep+0.0)+(tv21.tv_usec-tv20.tv_usec)/(nrep*1e6); free(work_ric_tv); free(work_ipm_tv_double); free(work_ipm_tv_int); for(ii=0; ii<N; ii++) { free(hpBAbt_tv[ii]); free(hpRSQ_tv[ii]); free(hpL_tv[ii]); free(hl[ii]); } free(hpRSQ_tv[N]); free(hpL_tv[N]); free(hl[N]); //exit(1); #endif // free memory for(ii=0; ii<=N; ii++) { free(idxb[ii]); free(hd[ii]); free(hlam[ii]); free(ht[ii]); } free(work_space_ip); #endif for(ii=0; ii<N; ii++) { free(hdA[ii]); free(hpBt[ii]); free(hpR[ii]); free(hpS[ii]); free(hpQ[ii]); free(hpLK[ii]); free(hpP[ii]); free(hrq[ii]); free(hux[ii]); free(hpi[ii]); free(hPb[ii]); free(hb[ii]); free(hres_rq[ii]); free(hres_b[ii]); } free(hpQ[N]); free(hpP[N]); free(pK); free(hrq[N]); free(hux[N]); free(hpi[N]); free(work_diag); free(hres_rq[N]); /************************************************ * test of normal riccati & IPM ************************************************/ printf("\nRiccati full\n\n"); nx = 25; nu = 1; N = 11; int rep; int nz = nx+nu+1; int anz = nal*((nz+nal-1)/nal); int anx = nal*((nx+nal-1)/nal); int pnz = bs*((nz+bs-1)/bs); int pnx = bs*((nx+bs-1)/bs); int pnu = bs*((nu+bs-1)/bs); int cnz = ncl*((nx+nu+1+ncl-1)/ncl); int cnx = ncl*((nx+ncl-1)/ncl); int cnu = ncl*((nu+ncl-1)/ncl); int cnl = cnz<cnx+ncl ? cnx+ncl : cnz; const int ncx = nx; #if 1 double *BAb_temp; d_zeros(&BAb_temp, nx, nu+nx+1); double *hpBAbt2[N]; ptrB = BBB; for(ii=0; ii<N; ii++) { //printf("\n%d\n", ii); d_zeros_align(&hpBAbt2[ii], pnz, cnx); for(jj=0; jj<nx*(nx+nu+1); jj++) BAb_temp[jj] = 0.0; for(jj=0; jj<nu; jj++) BAb_temp[jj*(nx+1)] = 1.0; d_copy_mat(nxx[ii+1]-1, nuu[ii], ptrB, nxx[ii+1]-1, BAb_temp+1, nx); ptrB += nxx[ii+1]-1; for(jj=0; jj<nxx[ii+1]; jj++) BAb_temp[nuu[ii]*nx+jj*(nx+1)] = 1.0; //for(jj=0; jj<nxx[ii+1]; jj++) BAb_temp[(nuu[ii]+nxx[ii+1])*nx+jj] = 1.0; //d_print_mat(nx, nu+nx+1, BAb_temp, nx); d_cvt_tran_mat2pmat(nx, nx+nu+1, BAb_temp, nx, 0, hpBAbt2[ii], cnx); //d_print_pmat(nx+nu+1, nx, bs, hpBAbt2[ii], cnx); } double *RSQ; d_zeros(&RSQ, nz, nz); double *hpRSQ[N+1]; for(ii=0; ii<=N; ii++) { //printf("\n%d\n", ii); d_zeros_align(&hpRSQ[ii], pnz, cnz); for(jj=0; jj<nz*nz; jj++) RSQ[jj] = 0.0; for(jj=nu; jj<2*nu; jj++) RSQ[jj*(nz+1)] = 1.0; for(jj=nu+nxx[ii]-nx0; jj<nu+nxx[ii]; jj++) RSQ[jj*(nz+1)] = 1.0; d_cvt_mat2pmat(nz, nz, RSQ, nz, 0, hpRSQ[ii], cnz); //d_print_pmat(nz, nz, bs, hpRSQ[ii], cnz); } double *hpL[N+1]; double *hq2[N+1]; double *hux2[N+1]; double *hpi2[N+1]; double *hPb2[N]; for(jj=0; jj<N; jj++) { d_zeros_align(&hq2[jj], pnz, 1); // it has to be pnz !!! d_zeros_align(&hpL[jj], pnz, cnl); d_zeros_align(&hux2[jj], pnz, 1); // it has to be pnz !!! d_zeros_align(&hpi2[jj], pnx, 1); d_zeros_align(&hPb2[jj], pnx, 1); } d_zeros_align(&hpL[N], pnz, cnl); d_zeros_align(&hq2[N], pnz, 1); // it has to be pnz !!! d_zeros_align(&hux2[N], pnz, 1); // it has to be pnz !!! d_zeros_align(&hpi2[N], pnx, 1); //double *work; d_zeros_align(&work, 2*anz, 1); double *work; d_zeros_align(&work, pnz, cnx); for(jj=0; jj<nx+nu; jj++) hux2[0][jj] = 0.0; for(jj=0; jj<nu; jj++) { hux2[0][nu+jj] = 7.5097; } for(; jj<nx; jj+=2) { hux2[0][nu+jj+0] = 15.01940; hux2[0][nu+jj+1] = 0.0; } printf("\nfactorization and backward-forward solution ...\n"); d_ric_sv_mpc(nx, nu, N, hpBAbt2, hpRSQ, 0, dummy, dummy, hux2, hpL, work, diag, COMPUTE_MULT, hpi2, 0, 0, 0, dummy, dummy, dummy, 0); printf("\nfactorization and backward-forward solution done\n\n"); //for(ii=0; ii<=N; ii++) // d_print_pmat(pnz, cnl-3, bs, hpL[ii], cnl); //d_print_pmat(pnz, nu, bs, hpL[0], cnl); //d_print_pmat(pnz, cnl-3, bs, hpL[1], cnl); //d_print_pmat(pnz, cnl-3, bs, hpL[2], cnl); //d_print_pmat(pnz, cnl-3, bs, hpL[N-3], cnl); //d_print_pmat(pnz, cnl-3, bs, hpL[N-2], cnl); //d_print_pmat(pnz, cnl-3, bs, hpL[N-1], cnl); //d_print_pmat(pnz, cnl, bs, hpL[N], cnl); #if 1 printf("\nux Riccati full\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nx+nu, hux2[ii], 1); #endif // residuals double *hres_rq2[N+1]; double *hres_b2[N]; for(ii=0; ii<N; ii++) { d_zeros_align(&hres_rq2[ii], pnz, 1); d_zeros_align(&hres_b2[ii], pnx, 1); } d_zeros_align(&hres_rq2[N], pnz, 1); printf("\nresuduals ...\n"); d_res_mpc(nx, nu, N, hpBAbt2, hpRSQ, hq2, hux2, hpi2, hres_rq2, hres_b2); printf("\nresiduals done\n\n"); printf("\nres_q full\n"); d_print_mat(1, nu, hres_rq2[ii], 1); for(ii=0; ii<N; ii++) d_print_mat(1, nx+nu, hres_rq2[ii], 1); printf("\nres_b full\n"); for(ii=0; ii<N; ii++) d_print_mat(1, nx, hres_b2[ii], 1); // timing //struct timeval tv20, tv21; #if 1 printf("\ntiming ...\n\n"); gettimeofday(&tv20, NULL); // start nrep = 10000; for(ii=0; ii<nrep; ii++) { d_ric_sv_mpc(nx, nu, N, hpBAbt2, hpRSQ, 0, dummy, dummy, hux2, hpL, work, diag, COMPUTE_MULT, hpi2, 0, 0, 0, dummy, dummy, dummy, 0); } gettimeofday(&tv21, NULL); // start time_ric_full = (float) (tv21.tv_sec-tv20.tv_sec)/(nrep+0.0)+(tv21.tv_usec-tv20.tv_usec)/(nrep*1e6); printf("\ntiming done\n\n"); #endif printf("\nIPM full\n\n"); int nb = nu+nx; int ng = 0; int ngN = 0; int pnb = (nb+bs-1)/bs*bs; int png = (ng+bs-1)/bs*bs; int pngN = (ngN+bs-1)/bs*bs; double *hd2[N+1]; double *hlam2[N+1]; double *ht2[N+1]; for(ii=0; ii<N; ii++) { d_zeros_align(&hd2[ii], 2*pnb+2*png, 1); d_zeros_align(&hlam2[ii],2*pnb+2*png, 1); d_zeros_align(&ht2[ii], 2*pnb+2*png, 1); } d_zeros_align(&hd2[N], 2*pnb+2*pngN, 1); d_zeros_align(&hlam2[N],2*pnb+2*pngN, 1); d_zeros_align(&ht2[N], 2*pnb+2*pngN, 1); // work space // more than enought !!!!! double *work_ipm_full; d_zeros_align(&work_ipm_full, hpmpc_ip_hard_mpc_dp_work_space(N, nx, nu, nb, ng, ngN), 1); // bounds for(ii=0; ii<=N; ii++) { for(jj=0; jj<nu; jj++) { hd2[ii][jj] = -20.5; hd2[ii][pnb+jj] = -20.5; } for(; jj<2*nu; jj++) { hd2[ii][jj] = - 2.5; hd2[ii][pnb+jj] = -10.0; } for(; jj<2*nu+(N-ii)*nx0; jj++) { hd2[ii][jj] = -100.0; hd2[ii][pnb+jj] = -100.0; } hd2[ii][jj+0] = 0.0; hd2[ii][pnb+jj+0] = -20.0; hd2[ii][jj+1] = -10.0; hd2[ii][pnb+jj+1] = -10.0; jj += 2; for(; jj<nu+nx; jj++) { hd2[ii][jj] = -100.0; hd2[ii][pnb+jj] = -100.0; } //d_print_mat(1, nb, hd2[ii], 1); //d_print_mat(1, nb, hd2[ii]+pnb, 1); } //exit(1); printf("\nIPM full solve ...\n\n"); d_ip2_hard_mpc(&kk, kmax, mu0, mu_tol, alpha_min, 0, sigma_par, stat, nx, nu, N, nb, ng, ngN, hpBAbt2, hpRSQ, dummy, hd2, hux2, 1, hpi2, hlam2, ht2, work_ipm_full); printf("\nIPM full solve done\n\n"); #if 1 printf("\nux IPM full\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nx+nu, hux2[ii], 1); #endif printf("\nstatistics\n\n"); for(ii=0; ii<kk; ii++) printf("%d\t%f\t%f\t%f\t%e\t%f\t%f\t%e\n", ii+1, stat[5*ii+0], stat[5*ii+1], stat[5*ii+2], stat[5*ii+2], stat[5*ii+3], stat[5*ii+4], stat[5*ii+4]); printf("\n\n"); // timing printf("\ntiming ...\n\n"); gettimeofday(&tv20, NULL); // start nrep = 1000; for(ii=0; ii<nrep; ii++) { d_ip2_hard_mpc(&kk, kmax, mu0, mu_tol, alpha_min, 0, sigma_par, stat, nx, nu, N, nb, ng, ngN, hpBAbt2, hpRSQ, dummy, hd2, hux2, 1, hpi2, hlam2, ht2, work_ipm_full); } gettimeofday(&tv21, NULL); // start printf("\ntiming done\n\n"); time_ip_full = (float) (tv21.tv_sec-tv20.tv_sec)/(nrep+0.0)+(tv21.tv_usec-tv20.tv_usec)/(nrep*1e6); // free memory free(work_ipm_full); for(ii=0; ii<N; ii++) { free(hd2[ii]); free(hlam2[ii]); free(ht2[ii]); } free(hd2[N]); free(hlam2[N]); free(ht2[N]); // free memory free(work); free(RSQ); free(BAb_temp); for(ii=0; ii<N; ii++) { free(hpBAbt2[ii]); free(hpRSQ[ii]); free(hpL[ii]); free(hux2[ii]); free(hpi2[ii]); free(hq2[ii]); free(hPb2[ii]); free(hres_rq2[ii]); free(hres_b2[ii]); } free(hpRSQ[N]); free(hpL[N]); free(hux2[N]); free(hpi2[N]); free(hq2[N]); free(hres_rq2[N]); #endif printf("\nric diag time = %e\t\tric full time = %e\t\tric full tv time = %e\t\tip diag time = %e\t\tip full time = %e\t\tip full tv time = %e\n\n", time_ric_diag, time_ric_full, time_ric_full_tv, time_ip_diag, time_ip_full, time_ip_full_tv); #endif }
int main() { printf("\n"); printf("\n"); printf("\n"); printf(" HPMPC -- Library for High-Performance implementation of solvers for MPC.\n"); printf(" Copyright (C) 2014 by Technical University of Denmark. All rights reserved.\n"); printf("\n"); printf(" HPMPC is distributed in the hope that it will be useful,\n"); printf(" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"); printf(" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"); printf(" See the GNU Lesser General Public License for more details.\n"); printf("\n"); printf("\n"); printf("\n"); printf("Riccati solver performance test - single precision\n"); printf("\n"); // maximum frequency of the processor const float GHz_max = 2.9; //3.6; //2.9; printf("Frequency used to compute theoretical peak: %5.1f GHz (edit test_dricposv.c to modify this value).\n", GHz_max); printf("\n"); // maximum flops per cycle, single precision #if defined(TARGET_X64_AVX) const float flops_max = 16; printf("Testing solvers for AVX instruction set, 64 bit: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_X64_SSE3) || defined(TARGET_AMD_SSE3) const float flops_max = 8; printf("Testing solvers for SSE3 instruction set, 64 bit: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_CORTEXA9) const float flops_max = 4; printf("Testing solvers for ARMv7a NEON instruction set: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_X86_ATOM) const float flops_max = 4; printf("Testing solvers for SSE3 instruction set, 32 bit, optimized for Intel Atom: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_POWERPC_G2) const float flops_max = 2; printf("Testing solvers for POWERPC instruction set, 32 bit: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_C99_4X4) const float flops_max = 2; printf("Testing reference solvers, 4x4 kernel: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_C99_2X2) const float flops_max = 2; printf("Testing reference solvers, 2x2 kernel: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #endif printf("\n"); printf("Tested solvers:\n"); printf("-sv : Riccati factorization and system solution (prediction step in IP methods)\n"); printf("-trs: system solution after a previous call to Riccati factorization (correction step in IP methods)\n"); printf("\n"); printf("\n"); #if defined(TARGET_X64_AVX) || defined(TARGET_X64_SSE3) || defined(TARGET_X86_ATOM) || defined(TARGET_AMD_SSE3) printf("\nflush to zero on\n"); _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); // flush to zero subnormals !!! works only with one thread !!! #endif // to throw floating-point exception /*#ifndef __APPLE__*/ /* feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);*/ /*#endif*/ int err; int i, j, ii, jj, idx; const int bsd = D_MR; //d_get_mr(); const int bss = S_MR; //s_get_mr(); int info = 0; int nn[] = {4, 6, 8, 10, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152, 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204, 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 248, 252, 256, 260, 264, 268, 272, 276, 280, 284, 288, 292, 296, 300}; int nnrep[] = {10000, 10000, 10000, 10000, 10000, 4000, 4000, 2000, 2000, 1000, 1000, 400, 400, 400, 200, 200, 200, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 40, 40, 40, 40, 40, 20, 20, 20, 20, 20, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10}; int vnx[] = {8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 512, 1024}; int vnrep[] = {100, 100, 100, 100, 100, 100, 50, 50, 50, 20, 10, 10}; int vN[] = {4, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256}; int ll; for(ll=0; ll<77; ll++) /* for(ll=0; ll<1; ll++)*/ { int nx = nn[ll];//NX;//16;//nn[ll]; // number of states (it has to be even for the mass-spring system test problem) int nu = 2;//NU;//5; // number of inputs (controllers) (it has to be at least 1 and at most nx/2 for the mass-spring system test problem) int N = 10;//NN;//10; // horizon lenght int nrep = nnrep[ll]; /* int nx = NX;//16;//nn[ll]; // number of states (it has to be even for the mass-spring system test problem)*/ /* int nu = NU;//5; // number of inputs (controllers) (it has to be at least 1 and at most nx/2 for the mass-spring system test problem)*/ /* int N = NN;//10; // horizon lenght*/ /* int nrep = NREP;*/ int rep; int nz = nx+nu+1; int pnz = bss*((nz+bss-nu%bss+bss-1)/bss); /************************************************ * dynamical system ************************************************/ double *A; d_zeros(&A, nx, nx); // states update matrix double *B; d_zeros(&B, nx, nu); // inputs matrix double *b; d_zeros(&b, nx, 1); // states offset double *x0; d_zeros(&x0, nx, 1); // initial state double Ts = 0.5; // sampling time mass_spring_system(Ts, nx, nu, N, A, B, b, x0); for(jj=0; jj<nx; jj++) b[jj] = 0.1; for(jj=0; jj<nx; jj++) x0[jj] = 0; x0[0] = 3.5; x0[1] = 3.5; // d_print_mat(nx, nx, A, nx); // d_print_mat(nx, nu, B, nx); // d_print_mat(nx, 1, b, nx); // d_print_mat(nx, 1, x0, nx); /* packed */ double *BAb; d_zeros(&BAb, nx, nz); dmcopy(nx, nu, B, nx, BAb, nx); dmcopy(nx, nx, A, nx, BAb+nu*nx, nx); dmcopy(nx, 1 , b, nx, BAb+(nu+nx)*nx, nx); // d_print_mat(nx, nx+nu+1, BAb, nx); /* transposed */ double *BAbt; d_zeros_align(&BAbt, pnz, pnz); for(ii=0; ii<nx; ii++) for(jj=0; jj<nz; jj++) { BAbt[jj+pnz*ii] = BAb[ii+nx*jj]; } // d_print_mat(nz, nx+1, BAbt, pnz); // s_print_mat(nz, nx+1, sBAbt, pnz); // return 0; /* packed into contiguous memory */ double *pBAbt; d_zeros_align(&pBAbt, pnz, pnz); d_cvt_mat2pmat(nz, nx, 0, bsd, BAbt, pnz, pBAbt, pnz); float *psBAbt; s_zeros_align(&psBAbt, pnz, pnz); s_cvt_d2s_pmat(nz, nx, bsd, pBAbt, pnz, bss, psBAbt, pnz); // d_print_pmat(nz, nx, bsd, pBAbt, pnz); // s_print_pmat(nz, nx, bss, spBAbt, pnz); /************************************************ * cost function ************************************************/ double *Q; d_zeros_align(&Q, pnz, pnz); for(ii=0; ii<nu; ii++) Q[ii*(pnz+1)] = 2.0; for(; ii<pnz; ii++) Q[ii*(pnz+1)] = 1.0; for(ii=0; ii<nz; ii++) Q[nx+nu+ii*pnz] = 1.0; Q[(nx+nu)*(pnz+1)] = 1e6; /* packed into contiguous memory */ float *pQ; s_zeros_align(&pQ, pnz, pnz); cvt_d2s_mat2pmat(nz, nz, 0, bss, Q, pnz, pQ, pnz); /* matrices series */ float *(hpQ[N+1]); float *(hq[N+1]); float *(hux[N+1]); float *(hpi[N+1]); float *(hpBAbt[N]); float *(hrb[N]); float *(hrq[N+1]); for(jj=0; jj<N; jj++) { s_zeros_align(&hpQ[jj], pnz, pnz); s_zeros_align(&hq[jj], pnz, 1); s_zeros_align(&hux[jj], pnz, 1); s_zeros_align(&hpi[jj], nx, 1); hpBAbt[jj] = psBAbt; s_zeros_align(&hrb[jj], nx, 1); s_zeros_align(&hrq[jj], nx+nu, 1); } s_zeros_align(&hpQ[N], pnz, pnz); s_zeros_align(&hq[N], pnz, 1); s_zeros_align(&hux[N], pnz, 1); s_zeros_align(&hpi[N], nx, 1); s_zeros_align(&hrq[N], nx+nu, 1); // starting guess for(jj=0; jj<nx; jj++) hux[0][nu+jj] = (float) x0[jj]; float *pL; s_zeros_align(&pL, pnz, pnz); float *pBAbtL; s_zeros_align(&pBAbtL, pnz, pnz); /************************************************ * riccati-like iteration ************************************************/ // predictor // restore cost function for(ii=0; ii<N; ii++) { for(jj=0; jj<pnz*pnz; jj++) hpQ[ii][jj]=pQ[jj]; } for(jj=0; jj<pnz*pnz; jj++) hpQ[N][jj]=pQ[jj]; // call the solver sricposv_mpc(nx, nu, N, pnz, hpBAbt, hpQ, hux, pL, pBAbtL, COMPUTE_MULT, hpi, &info); if(PRINTRES==1) { /* print result */ printf("\n\nsv\n\n"); for(ii=0; ii<N; ii++) s_print_mat(1, nu, hux[ii], 1); } if(PRINTRES==1 && COMPUTE_MULT==1) { // print result printf("\n\nsv\n\n"); for(ii=0; ii<N; ii++) s_print_mat(1, nx, hpi[ii+1], 1); } // corrector // clear solution for(ii=0; ii<N; ii++) { for(jj=0; jj<nu; jj++) hux[ii][jj] = 0; for(jj=0; jj<nx; jj++) hux[ii+1][nu+jj] = 0; } // restore linear part of cost function for(ii=0; ii<N; ii++) { for(jj=0; jj<nx+nu; jj++) hq[ii][jj] = Q[nx+nu+pnz*jj]; } for(jj=0; jj<nx+nu; jj++) hq[N][jj] = Q[nx+nu+pnz*jj]; // call the solver sricpotrs_mpc(nx, nu, N, pnz, hpBAbt, hpQ, hq, hux, pBAbtL, COMPUTE_MULT, hpi); if(PRINTRES==1) { // print result printf("\n\ntrs\n\n"); for(ii=0; ii<N; ii++) s_print_mat(1, nu, hux[ii], 1); } if(PRINTRES==1 && COMPUTE_MULT==1) { // print result printf("\n\ntrs\n\n"); for(ii=0; ii<N; ii++) s_print_mat(1, nx, hpi[ii+1], 1); } // restore cost function for(ii=0; ii<N; ii++) { for(jj=0; jj<pnz*pnz; jj++) hpQ[ii][jj]=pQ[jj]; } for(jj=0; jj<pnz*pnz; jj++) hpQ[N][jj]=pQ[jj]; // restore linear part of cost function for(ii=0; ii<N; ii++) { for(jj=0; jj<nx+nu; jj++) hq[ii][jj] = Q[nx+nu+pnz*jj]; } for(jj=0; jj<nx+nu; jj++) hq[N][jj] = Q[nx+nu+pnz*jj]; // residuals computation sres(nx, nu, N, pnz, hpBAbt, hpQ, hq, hux, hpi, hrq, hrb); if(PRINTRES==1 && COMPUTE_MULT==1) { // print result printf("\n\nres\n\n"); for(ii=0; ii<+N; ii++) s_print_mat(1, nx+nu, hrq[ii], 1); for(ii=0; ii<N; ii++) s_print_mat(1, nx, hrb[ii], 1); } // timing struct timeval tv0, tv1, tv2; gettimeofday(&tv0, NULL); // start // double precision for(rep=0; rep<nrep; rep++) { // restore cost function for(ii=0; ii<N; ii++) { for(jj=0; jj<pnz*pnz; jj++) hpQ[ii][jj]=pQ[jj]; } for(jj=0; jj<pnz*pnz; jj++) hpQ[N][jj]=pQ[jj]; // call the solver sricposv_mpc(nx, nu, N, pnz, hpBAbt, hpQ, hux, pL, pBAbtL, COMPUTE_MULT, hpi, &info); } gettimeofday(&tv1, NULL); // start for(rep=0; rep<nrep; rep++) { // clear solution for(ii=0; ii<N; ii++) { for(jj=0; jj<nu; jj++) hux[ii][jj] = 0; for(jj=0; jj<nx; jj++) hux[ii+1][nu+jj] = 0; } // restore linear part of cost function for(ii=0; ii<N; ii++) { for(jj=0; jj<nx+nu; jj++) hq[ii][jj] = Q[nx+nu+pnz*jj]; } for(jj=0; jj<nx+nu; jj++) hq[N][jj] = Q[nx+nu+pnz*jj]; // call the solver sricpotrs_mpc(nx, nu, N, pnz, hpBAbt, hpQ, hq, hux, pBAbtL, COMPUTE_MULT, hpi); } gettimeofday(&tv2, NULL); // start float time_sv = (float) (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6); float flop_sv = (1.0/3.0*nx*nx*nx+3.0/2.0*nx*nx) + N*(7.0/3.0*nx*nx*nx+4.0*nx*nx*nu+2.0*nx*nu*nu+1.0/3.0*nu*nu*nu+13.0/2.0*nx*nx+9.0*nx*nu+5.0/2.0*nu*nu); if(COMPUTE_MULT==1) flop_sv += N*2*nx*nx; float Gflops_sv = 1e-9*flop_sv/time_sv; float time_trs = (float) (tv2.tv_sec-tv1.tv_sec)/(nrep+0.0)+(tv2.tv_usec-tv1.tv_usec)/(nrep*1e6); float flop_trs = N*(8.0*nx*nx+8.0*nx*nu+2.0*nu*nu); if(COMPUTE_MULT==1) flop_trs += N*2*nx*nx; float Gflops_trs = 1e-9*flop_trs/time_trs; float Gflops_max = flops_max * GHz_max; if(ll==0) printf("\nnx\tnu\tN\tsv time\t\tsv Gflops\tsv \%\t\ttrs time\ttrs Gflops\ttrs \%\n\n"); printf("%d\t%d\t%d\t%e\t%f\t%f\t%e\t%f\t%f\n", nx, nu, N, time_sv, Gflops_sv, 100.0*Gflops_sv/Gflops_max, time_trs, Gflops_trs, 100.0*Gflops_trs/Gflops_max); /************************************************ * return ************************************************/ free(A); free(B); free(b); free(x0); free(BAb); free(BAbt); free(pBAbt); free(Q); free(pQ); free(pL); free(pBAbtL); for(jj=0; jj<N; jj++) { free(hpQ[jj]); free(hq[jj]); free(hux[jj]); free(hpi[jj]); } free(hpQ[N]); free(hq[N]); free(hux[N]); free(hpi[N]); } // increase size printf("\n"); printf("\n"); printf("\n"); return 0; }
int main() { #if defined(REF_BLAS_OPENBLAS) openblas_set_num_threads(1); #endif #if defined(REF_BLAS_BLIS) omp_set_num_threads(1); #endif printf("\n"); printf("\n"); printf("\n"); printf(" HPMPC -- Library for High-Performance implementation of solvers for MPC.\n"); printf(" Copyright (C) 2014 by Technical University of Denmark. All rights reserved.\n"); printf("\n"); printf(" HPMPC is distributed in the hope that it will be useful,\n"); printf(" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"); printf(" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"); printf(" See the GNU Lesser General Public License for more details.\n"); printf("\n"); printf("\n"); printf("\n"); printf("Riccati solver performance test - double precision\n"); printf("\n"); // maximum frequency of the processor const float GHz_max = GHZ_MAX; printf("Frequency used to compute theoretical peak: %5.1f GHz (edit test_param.h to modify this value).\n", GHz_max); printf("\n"); // maximum flops per cycle, double precision #if defined(TARGET_X64_AVX2) const float flops_max = 16; printf("Testing solvers for AVX & FMA3 instruction sets, 64 bit: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_X64_AVX) const float flops_max = 8; printf("Testing solvers for AVX instruction set, 64 bit: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_X64_SSE3) || defined(TARGET_AMD_SSE3) const float flops_max = 4; printf("Testing solvers for SSE3 instruction set, 64 bit: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_CORTEX_A15) const float flops_max = 2; printf("Testing solvers for ARMv7a VFPv3 instruction set, oprimized for Cortex A15: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_CORTEX_A9) const float flops_max = 1; printf("Testing solvers for ARMv7a VFPv3 instruction set, oprimized for Cortex A9: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_CORTEX_A7) const float flops_max = 0.5; printf("Testing solvers for ARMv7a VFPv3 instruction set, oprimized for Cortex A7: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_X86_ATOM) const float flops_max = 1; printf("Testing solvers for SSE3 instruction set, 32 bit, optimized for Intel Atom: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_POWERPC_G2) const float flops_max = 1; printf("Testing solvers for POWERPC instruction set, 32 bit: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_C99_4X4) const float flops_max = 2; printf("Testing reference solvers, 4x4 kernel: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_C99_4X4_PREFETCH) const float flops_max = 2; printf("Testing reference solvers, 4x4 kernel with register prefetch: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_C99_2X2) const float flops_max = 2; printf("Testing reference solvers, 2x2 kernel: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #endif FILE *f; f = fopen("./test_problems/results/test_blas.m", "w"); // a #if defined(TARGET_X64_AVX2) fprintf(f, "C = 'd_x64_avx2';\n"); fprintf(f, "\n"); #elif defined(TARGET_X64_AVX) fprintf(f, "C = 'd_x64_avx';\n"); fprintf(f, "\n"); #elif defined(TARGET_X64_SSE3) || defined(TARGET_AMD_SSE3) fprintf(f, "C = 'd_x64_sse3';\n"); fprintf(f, "\n"); #elif defined(TARGET_CORTEX_A9) fprintf(f, "C = 'd_ARM_cortex_A9';\n"); fprintf(f, "\n"); #elif defined(TARGET_CORTEX_A7) fprintf(f, "C = 'd_ARM_cortex_A7';\n"); fprintf(f, "\n"); #elif defined(TARGET_CORTEX_A15) fprintf(f, "C = 'd_ARM_cortex_A15';\n"); fprintf(f, "\n"); #elif defined(TARGET_X86_ATOM) fprintf(f, "C = 'd_x86_atom';\n"); fprintf(f, "\n"); #elif defined(TARGET_POWERPC_G2) fprintf(f, "C = 'd_PowerPC_G2';\n"); fprintf(f, "\n"); #elif defined(TARGET_C99_4X4) fprintf(f, "C = 'd_c99_4x4';\n"); fprintf(f, "\n"); #elif defined(TARGET_C99_4X4_PREFETCH) fprintf(f, "C = 'd_c99_4x4';\n"); fprintf(f, "\n"); #elif defined(TARGET_C99_2X2) fprintf(f, "C = 'd_c99_2x2';\n"); fprintf(f, "\n"); #endif fprintf(f, "A = [%f %f];\n", GHz_max, flops_max); fprintf(f, "\n"); fprintf(f, "B = [\n"); printf("\n"); printf("Tested solvers:\n"); printf("-sv : Riccati factorization and system solution (prediction step in IP methods)\n"); printf("-trs: system solution after a previous call to Riccati factorization (correction step in IP methods)\n"); printf("\n"); printf("\n"); #if defined(TARGET_X64_AVX2) || defined(TARGET_X64_AVX) || defined(TARGET_X64_SSE3) || defined(TARGET_X86_ATOM) || defined(TARGET_AMD_SSE3) /* printf("\nflush to zero on\n");*/ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); // flush to zero subnormals !!! works only with one thread !!! #endif // to throw floating-point exception /*#ifndef __APPLE__*/ /* feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);*/ /*#endif*/ int ii, jj; const int bs = D_MR; //d_get_mr(); const int ncl = D_NCL; const int nal = bs*ncl; // number of doubles per cache line int nn[] = {4, 6, 8, 10, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152, 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204, 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 248, 252, 256, 260, 264, 268, 272, 276, 280, 284, 288, 292, 296, 300}; int nnrep[] = {10000, 10000, 10000, 10000, 10000, 4000, 4000, 2000, 2000, 1000, 1000, 400, 400, 400, 200, 200, 200, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 40, 40, 40, 40, 40, 20, 20, 20, 20, 20, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10}; int vnx[] = {8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 512, 1024}; int vnrep[] = {100, 100, 100, 100, 100, 100, 50, 50, 50, 20, 10, 10}; int vN[] = {4, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256}; int nx, nw, ny, ndN, N, nrep, Ns; int diag_R; int ll; // int ll_max = 77; int ll_max = 1; for(ll=0; ll<ll_max; ll++) { FILE* fid; double* yy; float* yy_temp; if(1) { fid = fopen("./test_problems/mhe_measure.dat", "r"); if(fid==NULL) exit(-1); //printf("\nhola\n"); int dummy_int = fscanf(fid, "%d %d %d %d", &nx, &nw, &ny, &Ns); //printf("\n%d %d %d %d\n", nx, nw, ny, Ns); yy_temp = (float*) malloc(ny*Ns*sizeof(float)); yy = (double*) malloc(ny*Ns*sizeof(double)); for(jj=0; jj<ny*Ns; jj++) { dummy_int = fscanf(fid, "%e", &yy_temp[jj]); yy[jj] = (double) yy_temp[jj]; //printf("\n%f", yy[jj]); } //printf("\n"); fclose(fid); #if 1 N = 15; //Ns-1; // NN; nrep = NREP;//nnrep[ll]; nx = 12;//nn[ll]; nw = 5;//nn[ll]; ny = 3; ndN = 0; //2; diag_R = 0; #else N = 10; //Ns-1; // NN; nrep = nnrep[ll]; nx = nn[ll]; nw = nn[ll]; ny = 3; ndN = 0; diag_R = 0; #endif //printf("\nnx = %d; nw = %d; ny = %d; ndN = %d; N = %d\n\n", nx, nw, ny, ndN, N); } else if(ll_max==1) { nx = NX; // number of states (it has to be even for the mass-spring system test problem) nw = NU; // number of inputs (controllers) (it has to be at least 1 and at most nx/2 for the mass-spring system test problem) ny = nx/2; // size of measurements vector N = NN; // horizon lenght nrep = NREP; } else { nx = nn[ll]; // number of states (it has to be even for the mass-spring system test problem) nw = 2; // number of inputs (controllers) (it has to be at least 1 and at most nx/2 for the mass-spring system test problem) ny = nx/2; // size of measurements vector N = 10; // horizon lenght nrep = nnrep[ll]; } int rep; const int nz = nx+ny; // TODO delete const int nwx = nw+nx; const int anz = nal*((nz+nal-1)/nal); const int anx = nal*((nx+nal-1)/nal); const int anw = nal*((nw+nal-1)/nal); const int any = nal*((ny+nal-1)/nal); const int pnz = bs*((nz+bs-1)/bs); const int pnx = bs*((nx+bs-1)/bs); const int pnw = bs*((nw+bs-1)/bs); const int pny = bs*((ny+bs-1)/bs); const int pnx2 = bs*((2*nx+bs-1)/bs); const int pnwx = bs*((nw+nx+bs-1)/bs); const int cnz = ncl*((nz+ncl-1)/ncl); const int cnx = ncl*((nx+ncl-1)/ncl); const int cnw = ncl*((nw+ncl-1)/ncl); const int cny = ncl*((ny+ncl-1)/ncl); const int cnx2 = 2*(ncl*((nx+ncl-1)/ncl)); const int cnwx = ncl*((nw+nx+ncl-1)/ncl); const int cnwx1 = ncl*((nw+nx+1+ncl-1)/ncl); const int cnf = cnz<cnx+ncl ? cnx+ncl : cnz; const int pad = (ncl-(nx+nw)%ncl)%ncl; // packing between AGL & P const int cnl = nx+nw+pad+cnx; const int pad2 = (ncl-(nx)%ncl)%ncl; // packing between AGL & P const int cnl2 = cnz<cnx+ncl ? nx+pad2+cnx+ncl : nx+pad2+cnz; /************************************************ * dynamical system ************************************************/ double *A; d_zeros(&A, nx, nx); // states update matrix double *B; d_zeros(&B, nx, nw); // inputs matrix double *b; d_zeros(&b, nx, 1); // states offset double *x0; d_zeros(&x0, nx, 1); // initial state double Ts = 0.5; // sampling time mass_spring_system(Ts, nx, nw, N, A, B, b, x0); for(jj=0; jj<nx; jj++) b[jj] = 0.0; for(jj=0; jj<nx; jj++) x0[jj] = 0.0; x0[0] = 3.5; x0[1] = 3.5; double *C; d_zeros(&C, ny, nx); // inputs matrix for(jj=0; jj<ny; jj++) C[jj*(ny+1)] = 1.0; // d_print_mat(nx, nx, A, nx); // d_print_mat(nx, nw, B, nx); // d_print_mat(ny, nx, C, ny); // d_print_mat(nx, 1, b, nx); // d_print_mat(nx, 1, x0, nx); /* packed into contiguous memory */ double *pA; d_zeros_align(&pA, pnx, cnx); d_cvt_mat2pmat(nx, nx, A, nx, 0, pA, cnx); double *pG; d_zeros_align(&pG, pnx, cnw); d_cvt_mat2pmat(nx, nw, B, nx, 0, pG, cnw); double *pC; d_zeros_align(&pC, pny, cnx); d_cvt_mat2pmat(ny, nx, C, ny, 0, pC, cnx); double *pCA; d_zeros_align(&pCA, pnz, cnx); d_cvt_mat2pmat(ny, nx, C, ny, 0, pCA, cnx); d_cvt_mat2pmat(nx, nx, A, nx, ny, pCA+(ny/bs)*bs+ny%bs, cnx); // d_print_pmat(nx, nx, bs, pA, cnx); // d_print_pmat(nx, nw, bs, pG, cnw); // d_print_pmat(ny, nx, bs, pC, cnx); /************************************************ * cost function ************************************************/ double *R; d_zeros(&R, nw, nw); for(jj=0; jj<nw; jj++) R[jj*(nw+1)] = 1.0; double *Q; d_zeros(&Q, ny, ny); for(jj=0; jj<ny; jj++) Q[jj*(ny+1)] = 1.0; double *Qx; d_zeros(&Qx, nx, nx); for(jj=0; jj<ny; jj++) for(ii=0; ii<ny; ii++) Qx[ii+nx*jj] = Q[ii+ny*jj]; double *L0; d_zeros(&L0, nx, nx); for(jj=0; jj<nx; jj++) L0[jj*(nx+1)] = 1.0; double *q; d_zeros_align(&q, any, 1); for(jj=0; jj<ny; jj++) q[jj] = 0.0; double *r; d_zeros_align(&r, anw, 1); for(jj=0; jj<nw; jj++) r[jj] = 1.0; double *f; d_zeros_align(&f, anx, 1); for(jj=0; jj<nx; jj++) f[jj] = jj;//1.0; //b[jj]; //1.0; /* packed into contiguous memory */ double *pR; d_zeros_align(&pR, pnw, cnw); d_cvt_mat2pmat(nw, nw, R, nw, 0, pR, cnw); double *pQ; d_zeros_align(&pQ, pny, cny); d_cvt_mat2pmat(ny, ny, Q, ny, 0, pQ, cny); // d_print_pmat(nw, nw, bs, pQ, cnw); // d_print_pmat(ny, ny, bs, pR, cny); /************************************************ * compound quantities ************************************************/ double *pRG; d_zeros_align(&pRG, pnwx, cnw); d_cvt_mat2pmat(nw, nw, R, nw, 0, pRG, cnw); d_cvt_mat2pmat(nx, nw, B, nx, nw, pRG+(nw/bs)*bs*cnw+nw%bs, cnw); //d_print_pmat(nw+nx, nw, bs, pRG, cnw); double *pQA; d_zeros_align(&pQA, pnx2, cnx); d_cvt_mat2pmat(ny, ny, Q, ny, 0, pQA, cnx); d_cvt_mat2pmat(nx, nx, A, nx, nx, pQA+(nx/bs)*bs*cnx+nx%bs, cnx); //d_print_pmat(2*nx, cnx, bs, pQA, cnx); //exit(1); /************************************************ * series of matrices ************************************************/ double *(hpA[N]); double *(hpCA[N]); double *(hpG[N]); double *(hpC[N+1]); double *(hpR[N]); double *(hpQ[N+1]); double *(hpLp[N+1]); double *(hdLp[N+1]); double *(hpLp2[N+1]); double *(hpLe[N+1]); double *(hq[N]); double *(hr[N+1]); double *(hf[N]); double *(hxe[N+1]); double *(hxp[N+1]); double *(hw[N]); double *(hy[N+1]); double *(hlam[N]); double *(hpRG[N]); double *(hpQA[N+1]); double *(hpGLr[N]); double *(hpALe[N+1]); double *(hrr[N]); double *(hqq[N+1]); double *(hff[N+1]); double *p_hrr; d_zeros_align(&p_hrr, anw, N); double *p_hqq; d_zeros_align(&p_hqq, anx, N+1); double *p_hff; d_zeros_align(&p_hff, anx, N+1); double *p_hxe; d_zeros_align(&p_hxe, anx, N+1); double *p_hxp; d_zeros_align(&p_hxp, anx, N+1); double *p_hw; d_zeros_align(&p_hw, anw, N); double *p_hy; d_zeros_align(&p_hy, any, N+1); double *p_hlam; d_zeros_align(&p_hlam, anx, N+1); double *(hq_res[N+1]); double *(hr_res[N]); double *(hf_res[N+1]); double *p_hq_res; d_zeros_align(&p_hq_res, anx, N+1); double *p_hr_res; d_zeros_align(&p_hr_res, anw, N); double *p_hf_res; d_zeros_align(&p_hf_res, anx, N+1); for(jj=0; jj<N; jj++) { hpA[jj] = pA; hpCA[jj] = pCA; hpG[jj] = pG; hpC[jj] = pC; hpR[jj] = pR; hpQ[jj] = pQ; d_zeros_align(&hpLp[jj], pnx, cnl); d_zeros_align(&hdLp[jj], anx, 1); d_zeros_align(&hpLp2[jj], pnz, cnl2); d_zeros_align(&hpLe[jj], pnz, cnf); hr[jj] = r; hq[jj] = q; hf[jj] = f; hpRG[jj] = pRG; hpQA[jj] = pQA; d_zeros_align(&hpGLr[jj], pnwx, cnw); d_zeros_align(&hpALe[jj], pnx2, cnx2); hrr[jj] = p_hrr+jj*anw; hqq[jj] = p_hqq+jj*anx; hff[jj] = p_hff+jj*anx; hxe[jj] = p_hxe+jj*anx; //d_zeros_align(&hxe[jj], anx, 1); hxp[jj] = p_hxp+jj*anx; //d_zeros_align(&hxp[jj], anx, 1); hw[jj] = p_hw+jj*anw; //d_zeros_align(&hw[jj], anw, 1); hy[jj] = p_hy+jj*any; //d_zeros_align(&hy[jj], any, 1); hlam[jj] = p_hlam+jj*anx; //d_zeros_align(&hlambda[jj], anx, 1); hq_res[jj] = p_hq_res+jj*anx; hr_res[jj] = p_hr_res+jj*anw; hf_res[jj] = p_hf_res+jj*anx; } hpC[N] = pC; hpQ[N] = pQ; d_zeros_align(&hpLp[N], pnx, cnl); d_zeros_align(&hdLp[N], anx, 1); d_zeros_align(&hpLp2[N], pnz, cnl2); d_zeros_align(&hpLe[N], pnz, cnf); hq[N] = q; // equality constraints on the states at the last stage double *D; d_zeros(&D, ndN, nx); for(ii=0; ii<ndN; ii++) D[ii*(ndN+1)] = 1; //D[0+ndN*0] = 1; //D[1+ndN*(nx-1)] = 1; double *d; d_zeros_align(&d, ndN, 1); for(ii=0; ii<ndN; ii++) d[ii] = ii; //d[0] = 1; //d[1] = 0; const int pnxdN = bs*((nx+ndN+bs-1)/bs); double *pCtQC; d_zeros_align(&pCtQC, pnxdN, cnx); d_cvt_mat2pmat(ny, ny, Q, ny, 0, pCtQC, cnx); d_cvt_mat2pmat(ndN, nx, D, ndN, nx, pCtQC+nx/bs*bs*cnx+nx%bs, cnx); //d_print_pmat(nx+ndN, nx, bs, pCtRC, cnx); hpQA[N] = pCtQC; // there is not A_N d_zeros_align(&hpALe[N], pnxdN, cnx2); // there is not A_N: pnx not pnx2 hqq[N] = p_hqq+N*anx; hff[N] = p_hff+N*anx; const int pndN = bs*((ndN+bs-1)/bs); const int cndN = ncl*((ndN+ncl-1)/ncl); double *Ld; d_zeros_align(&Ld, pndN, cndN); double *d_res; d_zeros_align(&d_res, pndN, 1); hxe[N] = p_hxe+N*anx; //d_zeros_align(&hxe[N], anx, 1); hxp[N] = p_hxp+N*anx; //d_zeros_align(&hxp[N], anx, 1); hy[N] = p_hy+N*any; //d_zeros_align(&hy[N], any, 1); hlam[N] = p_hlam+N*anx; //d_zeros_align(&hlambda[jj], anx, 1); hf_res[N] = p_hf_res+N*anx; hq_res[N] = p_hq_res+N*anx; // initialize hpLp[0] with the cholesky factorization of /Pi_p d_cvt_mat2pmat(nx, nx, L0, nx, 0, hpLp[0]+(nx+nw+pad)*bs, cnl); for(ii=0; ii<nx; ii++) hdLp[0][ii] = 1.0/L0[ii*(nx+1)]; d_cvt_mat2pmat(nx, nx, L0, nx, ny, hpLp2[0]+(ny/bs)*bs+ny%bs+(nx+pad2+ny)*bs, cnl2); dtrtr_l_lib(nx, ny, hpLp2[0]+(ny/bs)*bs*cnl2+ny%bs+(nx+pad2+ny)*bs, cnl2, hpLp2[0]+(nx+pad2+ncl)*bs, cnl2); //d_print_pmat(nx, cnl, bs, hpLp[0], cnl); //d_print_pmat(nz, cnl2, bs, hpLp2[0], cnl2); // buffer for L0 double *pL0; d_zeros_align(&pL0, pnx, cnx); d_cvt_mat2pmat(nx, nx, L0, nx, 0, pL0, cnx); // invert L0 in hpALe[0] dtrinv_lib(nx, pL0, cnx, hpALe[0], cnx2); double *pL0_inv; d_zeros_align(&pL0_inv, pnx, cnx); dtrinv_lib(nx, pL0, cnx, pL0_inv, cnx); //d_print_pmat(nx, nx, bs, pL0, cnx); //d_print_pmat(nx, nx, bs, pL0_inv, cnx); //d_print_pmat(pnx2, cnx2, bs, hpALe[0], cnx2); //exit(1); //double *work; d_zeros_align(&work, pny*cnx+pnz*cnz+anz+pnz*cnf+pnw*cnw, 1); double *work; d_zeros_align(&work, 2*pny*cnx+anz+pnw*cnw+pnx*cnx, 1); //printf("\nciao %d %d %d %d %d %d\n", pny, cnx, anz, pnw, cnw, pnx); double *work2; d_zeros_align(&work2, 2*pny*cnx+pnw*cnw+pnx*cnw+2*pnx*cnx+anz, 1); double *work3; d_zeros_align(&work3, pnx*cnl+anx, 1); double *work4; d_zeros_align(&work4, 4*anx+2*(anx+anw), 1); // for(jj=0; jj<2*pny*cnx+anz+pnw*cnw+pnx*cnx; jj++) // work[jj] = -100.0; // measurements for(jj=0; jj<=N; jj++) for(ii=0; ii<ny; ii++) hy[jj][ii] = yy[jj*ny+ii]; //d_print_mat(ny, N+1, hy[0], any); // initial guess for(ii=0; ii<nx; ii++) x0[ii] = 0.0; for(ii=0; ii<nx; ii++) hxp[0][ii] = x0[ii]; // information filter - solution double *y_temp; d_zeros_align(&y_temp, any, 1); for(ii=0; ii<N; ii++) for(jj=0; jj<nw; jj++) hrr[ii][jj] = r[jj]; for(ii=0; ii<N; ii++) for(jj=0; jj<nx; jj++) hff[ii][jj] = f[jj]; for(jj=0; jj<ndN; jj++) hff[N][jj] = d[jj]; for(ii=0; ii<=N; ii++) { for(jj=0; jj<ny; jj++) y_temp[jj] = - q[jj]; //d_print_mat(1, ny, y_temp, 1); dsymv_lib(ny, ny, hpQ[ii], cny, hy[ii], y_temp, y_temp, -1); //d_print_mat(1, ny, y_temp, 1); dgemv_t_lib(ny, nx, hpC[ii], cnx, y_temp, hqq[ii], hqq[ii], 0); //d_print_mat(1, nx, hqq[ii], 1); //if(ii==9) //exit(1); } //exit(1); /************************************************ * new low-level mhe_if interface ************************************************/ int nrows = pnx>pnw ? 2*pnx : pnx+pnw; int ncols = cnwx1; double *pQRAG; d_zeros_align(&pQRAG, nrows, ncols); if(nx>=nw) { d_cvt_mat2pmat(ny, ny, Q, ny, 0, pQRAG, cnwx1); d_cvt_mat2pmat(nx, nx, A, nx, 0, pQRAG+pnx*cnwx1, cnwx1); d_cvt_mat2pmat(nw, nw, R, nw, 0, pQRAG+(pnx-pnw)*cnwx1+nx*bs, cnwx1); d_cvt_mat2pmat(nx, nw, B, nx, 0, pQRAG+pnx*cnwx1+nx*bs, cnwx1); //d_print_pmat(nrows, ncols, bs, pQRAG, ncols); if(nx>pnx-nx) d_cvt_mat2pmat(pnx-nx, nx, A+(nx-pnx+nx), nx, nx, pQRAG+nx/bs*bs*cnwx1+nx%bs, cnwx1); else d_cvt_mat2pmat(nx, nx, A, nx, nx, pQRAG+nx/bs*bs*cnwx1+nx%bs, cnwx1); if(nx>pnw-nw) d_cvt_mat2pmat(pnw-nw, nw, B+(nx-pnw+nw), nx, nw, pQRAG+(pnx-pnw+nw/bs*bs)*cnwx1+nw%bs+nx*bs, cnwx1); else d_cvt_mat2pmat(nx, nw, B, nx, nw, pQRAG+(pnx-pnw+nw/bs*bs)*cnwx1+nw%bs+nx*bs, cnwx1); //d_print_pmat(nrows, ncols, bs, pQRAG, ncols); } else { d_cvt_mat2pmat(ny, ny, Q, ny, 0, pQRAG+(pnw-pnx)*cnwx1, cnwx1); d_cvt_mat2pmat(nx, nx, A, nx, 0, pQRAG+pnw*cnwx1, cnwx1); d_cvt_mat2pmat(nw, nw, R, nw, 0, pQRAG+nx*bs, cnwx1); d_cvt_mat2pmat(nx, nw, B, nx, 0, pQRAG+pnw*cnwx1+nx*bs, cnwx1); //d_print_pmat(nrows, ncols, bs, pQRAG, ncols); if(nx>pnx-nx) d_cvt_mat2pmat(pnx-nx, nx, A+(nx-pnx+nx), nx, nx, pQRAG+(pnw-pnx+nx/bs*bs)*cnwx1+nx%bs, cnwx1); else d_cvt_mat2pmat(nx, nx, A, nx, nx, pQRAG+(pnw-pnx+nx/bs*bs)*cnwx1+nx%bs, cnwx1); if(nx>pnw-nw) d_cvt_mat2pmat(pnw-nw, nw, B+(nx-pnw+nw), nx, nw, pQRAG+nw/bs*bs*cnwx1+nw%bs+nx*bs, cnwx1); else d_cvt_mat2pmat(nx, nw, B, nx, nw, pQRAG+nw/bs*bs*cnwx1+nw%bs+nx*bs, cnwx1); //d_print_pmat(nrows, ncols, bs, pQRAG, ncols); } double *pQD; d_zeros_align(&pQD, pnx+pndN, cnx); d_cvt_mat2pmat(ny, ny, Q, ny, 0, pQD, cnx); d_cvt_mat2pmat(ndN, nx, D, ndN, 0, pQD+pnx*cnx, cnx); //d_print_pmat(pnx+pndN, cnx, bs, pQD, cnx); if(ndN>pnx-nx) d_cvt_mat2pmat(pnx-nx, nx, D+(ndN-pnx+nx), ndN, nx, pQD+nx/bs*bs*cnx+nx%bs, cnx); else d_cvt_mat2pmat(ndN, nx, D, ndN, nx, pQD+nx/bs*bs*cnx+nx%bs, cnx); //d_print_pmat(pnx+pndN, cnx, bs, pQD, cnx); //exit(1); double *(hpQRAG[N+1]); double *(hpLAG[N+1]); double *(hpLe2[N+1]); for(ii=0; ii<N; ii++) { hpQRAG[ii] = pQRAG; d_zeros_align(&hpLAG[ii], nrows, ncols); d_zeros_align(&hpLe2[ii], pnx, cnx); } hpQRAG[N] = pQD; d_zeros_align(&hpLAG[N], pnx+pndN, cnx); d_zeros_align(&hpLe2[N], pnx, cnx); d_cvt_mat2pmat(nx, nx, L0, nx, 0, hpLe2[0], cnx); //d_print_pmat(nx, nx, bs, hpLe2[0], cnx); double **dummy; #if 0 struct timeval tv10, tv11, tv12; // double precision gettimeofday(&tv10, NULL); // start for(ii=0; ii<1; ii++) //for(ii=0; ii<nrep; ii++) { d_ric_trf_mhe_if(nx, nw, ndN, N, hpQRAG, diag_R, hpLe2, hpLAG, Ld, work3); //d_ric_trf_mhe_if(nx, nw, ndN, N, hpQA, hpRG, diag_R, hpALe, hpGLr, Ld, work3); } gettimeofday(&tv11, NULL); // stop for(ii=0; ii<1; ii++) //for(ii=0; ii<nrep; ii++) { d_ric_trs_mhe_if(nx, nw, ndN, N, hpLe2, hpLAG, Ld, hqq, hrr, hff, hxp, hxe, hw, hlam, work3); } gettimeofday(&tv12, NULL); // stop float time_trf_mhe_if_new = (float) (tv11.tv_sec-tv10.tv_sec)/(nrep+0.0)+(tv11.tv_usec-tv10.tv_usec)/(nrep*1e6); float time_trs_mhe_if_new = (float) (tv12.tv_sec-tv11.tv_sec)/(nrep+0.0)+(tv12.tv_usec-tv11.tv_usec)/(nrep*1e6); printf("\ntime = %e\t%e\n\n", time_trf_mhe_if_new, time_trs_mhe_if_new); //exit(1); #endif /************************************************ * reference code ************************************************/ double *(hA[N]); double *(hG[N]); double *(hQ[N+1]); double *(hR[N]); double *(hAGU[N]); double *(hUp[N+1]); double *(hUe[N+1]); double *(hUr[N]); double *Ud; double *work_ref; for(ii=0; ii<N; ii++) { hA[ii] = A; hG[ii] = B; hQ[ii] = Qx; hR[ii] = R; d_zeros(&hAGU[ii], nx, nx+nw); d_zeros(&hUp[ii], nx, nx); d_zeros(&hUe[ii], nx, nx); d_zeros(&hUr[ii], nw, nw); } hA[N] = D; hQ[N] = Qx; d_zeros(&hAGU[N], ndN, nx); d_zeros(&hUp[N], nx, nx); d_zeros(&hUe[N], nx, nx); d_zeros(&Ud, ndN, ndN); d_zeros(&work_ref, nx+nw, 1); for(ii=0; ii<nx*nx; ii++) hUp[0][ii] = L0[ii]; #if 0 printf("\nfactorization\n"); d_ric_trf_mhe_if_blas( nx, nw, ndN, N, hA, hG, hQ, hR, hAGU, hUp, hUe, hUr, Ud); printf("\nsolution\n"); d_ric_trs_mhe_if_blas( nx, nw, ndN, N, hAGU, hUp, hUe, hUr, Ud, hqq, hrr, hff, hxp, hxe, hw, hlam, work_ref); //d_print_mat(nx, nx, hUe[N], nx); //exit(1); #endif /************************************************ * high-level interface ************************************************/ #if 0 int kk; double *AA; d_zeros(&AA, nx, nx*N); //for(ii=0; ii<N; ii++) for(jj=0; jj<nx; jj++) for(ll=0; ll<nx; ll++) AA[ll+nx*jj+nx*nx*ii] = A[ll+nx*jj]; for(ii=0; ii<N; ii++) for(jj=0; jj<nx; jj++) for(kk=0; kk<nx; kk++) AA[jj+nx*kk+nx*nx*ii] = A[kk+nx*jj]; double *GG; d_zeros(&GG, nx, nw*N); //for(ii=0; ii<N; ii++) for(jj=0; jj<nw; jj++) for(ll=0; ll<nx; ll++) GG[ll+nx*jj+nx*nw*ii] = B[ll+nx*jj]; for(ii=0; ii<N; ii++) for(jj=0; jj<nw; jj++) for(kk=0; kk<nx; kk++) GG[jj+nw*kk+nx*nw*ii] = B[kk+nx*jj]; double *ff; d_zeros(&ff, nx, N); for(ii=0; ii<N; ii++) for(jj=0; jj<nx; jj++) ff[jj+nx*ii] = f[jj]; double *DD; d_zeros(&DD, ndN, nx); //for(jj=0; jj<nx; jj++) for(ll=0; ll<ndN; ll++) DD[ll+ndN*jj] = D[ll+ndN*jj]; for(jj=0; jj<nx; jj++) for(kk=0; kk<ndN; kk++) DD[jj+nx*kk] = D[kk+ndN*jj]; double *dd; d_zeros(&dd, ndN, 1); for(kk=0; kk<ndN; kk++) dd[kk] = d[kk]; double *RR; d_zeros(&RR, nw, nw*N); for(ii=0; ii<N; ii++) for(jj=0; jj<nw*nw; jj++) RR[jj+nw*nw*ii] = R[jj]; double *QQ; d_zeros(&QQ, nx, nx*N); for(ii=0; ii<N; ii++) { for(jj=0; jj<ny; jj++) for(kk=0; kk<ny; kk++) QQ[kk+nx*jj+nx*nx*ii] = Q[kk+ny*jj]; //for(jj=ny; jj<nx; jj++) QQ[jj+nx*jj+nx*nx*ii] = 1e-8; } double *Qf; d_zeros(&Qf, nx, nx); for(jj=0; jj<ny; jj++) for(kk=0; kk<ny; kk++) Qf[kk+nx*jj] = Q[kk+ny*jj]; double *rr; d_zeros(&rr, nw, N); for(ii=0; ii<N; ii++) for(jj=0; jj<nw; jj++) rr[jj+nw*ii] = r[jj]; double *qq; d_zeros(&qq, nx, N); for(ii=0; ii<N; ii++) for(jj=0; jj<ny; jj++) qq[jj+nx*ii] = q[jj]; double *yy_tmp; d_zeros_align(&yy_tmp, any, 1); for(ii=0; ii<N; ii++) { for(jj=0; jj<ny; jj++) yy_tmp[jj] = - q[jj]; dsymv_lib(ny, ny, hpQ[ii], cny, hy[ii], yy_tmp, -1); dgemv_t_lib(ny, nx, hpC[ii], cnx, yy_tmp, &qq[ii*nx], 0); } double *qf; d_zeros(&qf, nx, 1); // for(jj=0; jj<ny; jj++) qf[jj] = q[jj]; // if(ndN>0) // { for(jj=0; jj<ny; jj++) yy_tmp[jj] = - q[jj]; dsymv_lib(ny, ny, hpQ[N], cny, hy[N], yy_tmp, -1); dgemv_t_lib(ny, nx, hpC[N], cnx, yy_tmp, qf, 0); // } double *xx0; d_zeros(&xx0, nx, 1); double *LL0; d_zeros(&LL0, nx, nx); double *xxe; d_zeros(&xxe, nx, N+1); double *LLe; d_zeros(&LLe, nx, nx); double *ww; d_zeros(&ww, nw, N); double *llam; d_zeros(&llam, nx, N+1); double *work_high_level; d_zeros(&work_high_level, hpmpc_ric_mhe_if_dp_work_space(nx, nw, ny, ndN, N), 1); double *dummy0; struct timeval tv00, tv01; int error_code; printf("\nhigh-level\n"); // double precision gettimeofday(&tv00, NULL); // start for(ii=0; ii<nrep; ii++) { for(jj=0; jj<nx; jj++) xx0[jj] = x0[jj]; for(jj=0; jj<nx*nx; jj++) LL0[jj] = L0[jj]; //error_code = fortran_order_riccati_mhe_if( 'd', 2, nx, nw, 0, ndN, N, AA, GG, dummy, ff, DD, dd, RR, QQ, Qf, rr, qq, qf, dummy, xx0, LL0, xxe, LLe, ww, llam, work_high_level); error_code = c_order_riccati_mhe_if( 'd', 2, nx, nw, 0, ndN, N, AA, GG, dummy0, ff, DD, dd, RR, QQ, Qf, rr, qq, qf, dummy0, xx0, LL0, xxe, LLe, ww, llam, work_high_level); //if(error_code) // break; } gettimeofday(&tv01, NULL); // stop float time_mhe_if_high_level = (float) (tv01.tv_sec-tv00.tv_sec)/(nrep+0.0)+(tv01.tv_usec-tv00.tv_usec)/(nrep*1e6); printf("\nhigh-level interface for MHE_if\n\nerror_code: %d, time = %e\n\n", error_code, time_mhe_if_high_level); //d_print_mat(nx, N+1, xxe, nx); //d_print_mat(nw, N, ww, nw); free(AA); free(GG); free(ff); free(DD); free(dd); free(RR); free(QQ); free(Qf); free(rr); free(qq); free(qf); free(xx0); free(LL0); free(xxe); free(LLe); free(ww); free(llam); free(work_high_level); free(yy_tmp); //exit(1); #endif /************************************************ * call the solver ************************************************/ //d_print_mat(nx, nx, A, nx); //d_print_mat(nx, nw, B, nx); //d_ric_trf_mhe_test(nx, nw, ny, N, hpA, hpG, hpC, hpLp, hpQ, hpR, hpLe, work); d_ric_trf_mhe(nx, nw, ny, N, hpA, hpG, hpC, hpLp, hdLp, hpR, hpQ, hpLe, work); // estimation d_ric_trs_mhe(nx, nw, ny, N, hpA, hpG, hpC, hpLp, hdLp, hpR, hpQ, hpLe, hr, hq, hf, hxp, hxe, hw, hy, 0, hlam, work); #if 0 // print solution printf("\nx_e\n"); d_print_mat(nx, N+1, hxe[0], anx); #endif // smooth estimation d_ric_trs_mhe(nx, nw, ny, N, hpA, hpG, hpC, hpLp, hdLp, hpR, hpQ, hpLe, hr, hq, hf, hxp, hxe, hw, hy, 1, hlam, work); //d_print_pmat(nx, nx, bs, hpLp[N-1]+(nx+nw+pad)*bs, cnl); //d_print_pmat(nx, nx, bs, hpLp[N]+(nx+nw+pad)*bs, cnl); //d_print_pmat(nx, nx, bs, hpLe[N-1]+ncl*bs, cnf); //d_print_pmat(nx, nx, bs, hpLe[N]+ncl*bs, cnf); #if 1 printf("\nx_s\n"); //d_print_mat(nx, N+1, hxp[0], anx); d_print_mat(nw, N, hw[0], anw); d_print_mat(nx, N+1, hxe[0], anx); //d_print_mat(nx, N, hlam[0], anx); #endif // information filter - factorization //d_ric_trf_mhe_if(nx, nw, ndN, N, hpQA, hpRG, diag_R, hpALe, hpGLr, Ld, work3); d_ric_trf_mhe_if(nx, nw, ndN, N, hpQRAG, diag_R, hpLe2, hpLAG, Ld, work3); // information filter - solution //d_ric_trs_mhe_if(nx, nw, ndN, N, hpALe, hpGLr, Ld, hqq, hrr, hff, hxp, hxe, hw, hlam, work3); d_ric_trs_mhe_if(nx, nw, ndN, N, hpLe2, hpLAG, Ld, hqq, hrr, hff, hxp, hxe, hw, hlam, work3); //d_ric_trs_mhe(nx, nw, ny, N, hpA, hpG, hpC, hpLp, hdLp, hpQ, hpR, hpLe, hq, hr, hf, hxp, hxe, hw, hy, 1, hlam, work); //d_print_pmat(nx, nx, bs, hpALe[N-1], cnx2); //d_print_pmat(nx, nx, bs, hpALe[N], cnx2); //d_print_pmat(nx, nx, bs, hpALe[N-2]+cnx*bs, cnx2); //d_print_pmat(nx, nx, bs, hpALe[N-1]+cnx*bs, cnx2); //d_print_pmat(nx, nx, bs, hpALe[N]+cnx*bs, cnx2); //d_print_pmat(nx, nx, bs, hpRA[N], cnx); #if 1 printf("\nx_s_if\n"); //d_print_mat(nx, N+1, hxp[0], anx); d_print_mat(nw, N, hw[0], anw); d_print_mat(nx, N+1, hxe[0], anx); //d_print_mat(nx, N, hlam[0], anx); //exit(1); #endif //d_print_pmat(nw, nw, bs, hpQ[0], cnw); //d_print_pmat(nx, nw, bs, hpG[0], cnw); //d_print_mat(nw, 1, hq[0], nw); //d_print_mat(nw, 1, hw[0], nw); //d_print_mat(nx, 1, hlam[0], nx); //exit(3); #if 1 int nZ = nw+nx+1; int pnZ = (nw+nx+1+bs-1)/bs*bs; int cnZ = (nw+nx+1+ncl-1)/ncl*ncl; int cnL = cnZ>cnx+ncl ? cnZ : cnx+ncl; double *(hpRSQrq[N+1]); for(ii=0; ii<=N; ii++) { d_zeros_align(&hpRSQrq[ii], pnZ, cnZ); d_cvt_mat2pmat(nw, nw, R, nw, 0, hpRSQrq[ii], cnZ); d_cvt_mat2pmat(ny, ny, Q, ny, nw, hpRSQrq[ii]+nw/bs*bs*cnZ+nw%bs+nw*bs, cnZ); d_cvt_mat2pmat(1, nw, r, 1, nw+nx, hpRSQrq[ii]+(nw+nx)/bs*bs*cnZ+(nw+nx)%bs, cnZ); d_cvt_mat2pmat(1, nx, hqq[ii], 1, nw+nx, hpRSQrq[ii]+(nw+nx)/bs*bs*cnZ+(nw+nx)%bs+nw*bs, cnZ); //d_print_pmat(nZ, nZ, bs, hpRSQrq[ii], cnZ); } double *pP0; d_zeros_align(&pP0, pnx, cnx); d_cvt_mat2pmat(nx, nx, L0, nx, 0, pP0, cnx); //d_print_pmat(nx, nx, bs, pP0, cnx); dgead_lib(nx, nx, 1.0, 0, pP0, cnx, nw, hpRSQrq[0]+nw/bs*bs*cnZ+nw%bs+nw*bs, cnZ); //d_print_pmat(nZ, nZ, bs, hpRSQrq[0], cnZ); double *pBAbt; d_zeros_align(&pBAbt, pnZ, cnx); d_cvt_tran_mat2pmat(nx, nw, B, nx, 0, pBAbt, cnx); d_cvt_tran_mat2pmat(nx, nx, A, nx, nw, pBAbt+nw/bs*bs*cnx+nw%bs, cnx); d_cvt_mat2pmat(1, nx, f, 1, nw+nx, pBAbt+(nw+nx)/bs*bs*cnx+(nw+nx)%bs, cnx); //d_print_pmat(nZ, nx, bs, pBAbt, cnx); double *(hpBAbt[N]); for(ii=0; ii<N; ii++) { hpBAbt[ii] = pBAbt; } double *(hpLam[N+1]); for(ii=0; ii<=N; ii++) { d_zeros_align(&hpLam[ii], pnZ, cnL); } double *work_ric; d_zeros_align(&work_ric, pnZ, cnx); double *diag_ric; d_zeros_align(&diag_ric, pnZ, 1); double *hux_mat; d_zeros_align(&hux_mat, pnZ, N+1); double *(hux[N+1]); for(ii=0; ii<=N; ii++) { hux[ii] = hux_mat+ii*pnZ; } double **pdummy; d_back_ric_sv(N, nx, nw, hpBAbt, hpRSQrq, 0, pdummy, pdummy, 0, hux, hpLam, work_ric, diag_ric, 0, pdummy, 0, pdummy, 0, 0, 0, pdummy, pdummy, pdummy); d_print_mat(nw, N+1, hux_mat, pnZ); d_print_mat(nx, N+1, hux_mat+nw, pnZ); exit(1); #endif // compute residuals double *p0; d_zeros_align(&p0, anx, 1); double *x_temp; d_zeros_align(&x_temp, anx, 1); dtrmv_u_t_lib(nx, pL0_inv, cnx, x0, x_temp, 0); dtrmv_u_n_lib(nx, pL0_inv, cnx, x_temp, p0, 0); d_res_mhe_if(nx, nw, ndN, N, hpQA, hpRG, pL0_inv, hqq, hrr, hff, p0, hxe, hw, hlam, hq_res, hr_res, hf_res, work4); // printf("\nprint residuals\n\n"); // d_print_mat(nx, N+1, hq_res[0], anx); // d_print_mat(nw, N, hr_res[0], anw); // d_print_mat(nx, N, hf_res[0], anx); // d_print_mat(ndN, 1, hf_res[0]+N*anx, anx); //return 0; //exit(1); if(0 && PRINTRES) { // print solution printf("\nx_p\n"); d_print_mat(nx, N+1, hxp[0], anx); printf("\nx_s\n"); d_print_mat(nx, N+1, hxe[0], anx); printf("\nw\n"); d_print_mat(nw, N+1, hw[0], anw); //printf("\nL_p\n"); //d_print_pmat(nx, nx, bs, hpLp[0]+(nx+nw+pad)*bs, cnl); //d_print_mat(1, nx, hdLp[0], 1); //d_print_pmat(nx, nx, bs, hpLp[1]+(nx+nw+pad)*bs, cnl); //d_print_mat(1, nx, hdLp[1], 1); //d_print_pmat(nx, nx, bs, hpLp[2]+(nx+nw+pad)*bs, cnl); //d_print_mat(1, nx, hdLp[2], 1); //d_print_pmat(nx, nx, bs, hpLp[N]+(nx+nw+pad)*bs, cnl); //d_print_mat(1, nx, hdLp[N], 1); //printf("\nL_p\n"); //d_print_pmat(nz, nz, bs, hpLp2[0]+(nx+pad2)*bs, cnl2); //d_print_pmat(nz, nz, bs, hpLp2[1]+(nx+pad2)*bs, cnl2); //d_print_pmat(nz, nz, bs, hpLp2[2]+(nx+pad2)*bs, cnl2); //printf("\nL_e\n"); //d_print_pmat(nz, nz, bs, hpLe[0], cnf); //d_print_pmat(nz, nz, bs, hpLe[1], cnf); //d_print_pmat(nz, nz, bs, hpLe[2], cnf); //d_print_pmat(nx, nx, bs, hpA[0], cnx); } // timing struct timeval tv0, tv1, tv2, tv3, tv4, tv5, tv6, tv7, tv8; // double precision gettimeofday(&tv0, NULL); // start // factorize for(rep=0; rep<nrep; rep++) { //d_ric_trf_mhe_test(nx, nw, ny, N, hpA, hpG, hpC, hpLp, hpQ, hpR, hpLe, work); d_ric_trf_mhe(nx, nw, ny, N, hpA, hpG, hpC, hpLp, hdLp, hpR, hpQ, hpLe, work); } gettimeofday(&tv1, NULL); // start // solve for(rep=0; rep<nrep; rep++) { d_ric_trs_mhe(nx, nw, ny, N, hpA, hpG, hpC, hpLp, hdLp, hpR, hpQ, hpLe, hr, hq, hf, hxp, hxe, hw, hy, 1, hlam, work); } gettimeofday(&tv2, NULL); // start // factorize for(rep=0; rep<nrep; rep++) { //d_print_pmat(nx, nx, bs, hpLe[N]+(ncl)*bs, cnf); //d_print_pmat(nx, nx, bs, hpLp[N]+(nx+nw+pad)*bs, cnl); //d_ric_trf_mhe_test(nx, nw, ny, N, hpA, hpG, hpC, hpLp, hpQ, hpR, hpLe, work); d_ric_trf_mhe_end(nx, nw, ny, N, hpCA, hpG, hpC, hpLp2, hpR, hpQ, hpLe, work2); } gettimeofday(&tv3, NULL); // start // solve for(rep=0; rep<nrep; rep++) { d_ric_trs_mhe_end(nx, nw, ny, N, hpA, hpG, hpC, hpLp2, hpR, hpQ, hpLe, hr, hq, hf, hxp, hxe, hy, work2); } gettimeofday(&tv4, NULL); // start // factorize information filter for(rep=0; rep<nrep; rep++) { //d_ric_trf_mhe_if(nx, nw, ndN, N, hpQA, hpRG, diag_R, hpALe, hpGLr, Ld, work3); d_ric_trf_mhe_if(nx, nw, ndN, N, hpQRAG, diag_R, hpLe2, hpLAG, Ld, work3); } gettimeofday(&tv5, NULL); // start // factorize information filter for(rep=0; rep<nrep; rep++) { //d_ric_trs_mhe_if(nx, nw, ndN, N, hpALe, hpGLr, Ld, hqq, hrr, hff, hxp, hxe, hw, hlam, work3); d_ric_trs_mhe_if(nx, nw, ndN, N, hpLe2, hpLAG, Ld, hqq, hrr, hff, hxp, hxe, hw, hlam, work3); } gettimeofday(&tv6, NULL); // start // factorize information filter for(rep=0; rep<nrep; rep++) { #if defined(REF_BLAS_OPENBLAS) || defined(REF_BLAS_BLIS) || defined(REF_BLAS_NETLIB) //d_ric_trf_mhe_if_blas( nx, nw, ndN, N, hA, hG, hQ, hR, hAGU, hUp, hUe, hUr); d_ric_trf_mhe_if_blas( nx, nw, ndN, N, hA, hG, hQ, hR, hAGU, hUp, hUe, hUr, Ud); #endif } gettimeofday(&tv7, NULL); // start // solution information filter for(rep=0; rep<nrep; rep++) { #if defined(REF_BLAS_OPENBLAS) || defined(REF_BLAS_BLIS) || defined(REF_BLAS_NETLIB) d_ric_trs_mhe_if_blas( nx, nw, ndN, N, hAGU, hUp, hUe, hUr, Ud, hqq, hrr, hff, hxp, hxe, hw, hlam, work_ref); #endif } gettimeofday(&tv8, NULL); // start float Gflops_max = flops_max * GHz_max; float time_trf = (float) (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6); float time_trs = (float) (tv2.tv_sec-tv1.tv_sec)/(nrep+0.0)+(tv2.tv_usec-tv1.tv_usec)/(nrep*1e6); float time_trf_end = (float) (tv3.tv_sec-tv2.tv_sec)/(nrep+0.0)+(tv3.tv_usec-tv2.tv_usec)/(nrep*1e6); float time_trs_end = (float) (tv4.tv_sec-tv3.tv_sec)/(nrep+0.0)+(tv4.tv_usec-tv3.tv_usec)/(nrep*1e6); float time_trf_if = (float) (tv5.tv_sec-tv4.tv_sec)/(nrep+0.0)+(tv5.tv_usec-tv4.tv_usec)/(nrep*1e6); float time_trs_if = (float) (tv6.tv_sec-tv5.tv_sec)/(nrep+0.0)+(tv6.tv_usec-tv5.tv_usec)/(nrep*1e6); float time_trf_if_blas = (float) (tv7.tv_sec-tv6.tv_sec)/(nrep+0.0)+(tv7.tv_usec-tv6.tv_usec)/(nrep*1e6); float time_trs_if_blas = (float) (tv8.tv_sec-tv7.tv_sec)/(nrep+0.0)+(tv8.tv_usec-tv7.tv_usec)/(nrep*1e6); float flop_trf_if = N*(10.0/3.0*nx*nx*nx+nx*nx*nw)+2.0/3.0*nx*nx*nx+ndN*nx*nx+ndN*ndN*nx+1.0/3.0*ndN*ndN*ndN; if(diag_R==0) flop_trf_if += N*(nx*nw*nw+1.0/3.0*nw*nw*nw); else flop_trf_if += N*(nx*nw+1.0/2.0*nw*nw); float Gflops_trf_if = flop_trf_if*1e-9/time_trf_if; float Gflops_trf_if_blas = flop_trf_if*1e-9/time_trf_if_blas; if(ll==0) { printf("\nnx\tnw\tny\tN\ttrf time\ttrs time\ttrf_e time\ttrs_e time\ttrf_if time\ttrf_if Gflops\ttrf_if percent\ttrs_if time\ttrf_if BLAS\tGflops\t\tpercent\t\ttrs_if BLAS\n\n"); // fprintf(f, "\nnx\tnu\tN\tsv time\t\tsv Gflops\tsv %%\t\ttrs time\ttrs Gflops\ttrs %%\n\n"); } printf("%d\t%d\t%d\t%d\t%e\t%e\t%e\t%e\t%e\t%f\t%f\t%e\t%e\t%f\t%f\t%e\n", nx, nw, ny, N, time_trf, time_trs, time_trf_end, time_trs_end, time_trf_if, Gflops_trf_if, 100*Gflops_trf_if/Gflops_max, time_trs_if, time_trf_if_blas, Gflops_trf_if_blas, 100*Gflops_trf_if_blas/Gflops_max, time_trs_if_blas); #if 0 return 0; // moving horizon test // window size N = 20; double *(hhxe[N+1]); double *(hhxp[N+1]); double *(hhw[N]); double *(hhy[N+1]); double *(hhlam[N]); double *p_hhxe; d_zeros_align(&p_hhxe, anx, N+1); double *p_hhxp; d_zeros_align(&p_hhxp, anx, N+1); double *p_hhw; d_zeros_align(&p_hhw, anw, N); double *p_hhlam; d_zeros_align(&p_hhlam, anx, N); // shift measurements and initial prediction for(ii=0; ii<N; ii++) { hhxe[ii] = p_hhxe+ii*anx; //d_zeros_align(&hxe[jj], anx, 1); hhxp[ii] = p_hhxp+ii*anx; //d_zeros_align(&hxp[jj], anx, 1); hhw[ii] = p_hhw+ii*anw; //d_zeros_align(&hw[jj], anw, 1); hhy[ii] = hy[ii]; //d_zeros_align(&hy[jj], any, 1); hhlam[ii] = p_hhlam+ii*anx; //d_zeros_align(&hlam[jj], anx, 1); } hhxe[N] = p_hhxe+N*anx; //d_zeros_align(&hxe[jj], anx, 1); hhxp[N] = p_hhxp+N*anx; //d_zeros_align(&hxp[jj], anx, 1); hhy[N] = hy[N]; //d_zeros_align(&hy[jj], any, 1); // shift initial prediction covariance //for(ii=0; ii<pnx*cnl; ii++) // hpLp[0][ii] = hpLp[1][ii]; d_ric_trf_mhe(nx, nw, ny, N, hpA, hpG, hpC, hpLp, hdLp, hpQ, hpR, hpLe, work); d_ric_trs_mhe(nx, nw, ny, N, hpA, hpG, hpC, hpLp, hdLp, hpQ, hpR, hpLe, hq, hr, hf, hhxp, hhxe, hhw, hhy, 1, hhlam, work); // zero data for(ii=0; ii<Ns*anx; ii++) hxe[0][ii] = 0.0; for(ii=anx; ii<Ns*anx; ii++) hxp[0][ii] = 0.0; for(ii=0; ii<(Ns-1)*anw; ii++) hw[0][ii] = 0.0; for(ii=0; ii<(Ns-1)*anx; ii++) hlam[0][ii] = 0.0; // save data for(ii=0; ii<(N+1); ii++) for(jj=0; jj<nx; jj++) hxe[ii][jj] = hhxe[ii][jj]; for(ii=0; ii<(N+1); ii++) for(jj=0; jj<nx; jj++) hxp[ii][jj] = hhxp[ii][jj]; for(ii=0; ii<N; ii++) for(jj=0; jj<nw; jj++) hw[ii][jj] = hhw[ii][jj]; //d_print_mat(nw, N, hw[0], anw); for(ii=0; ii<N; ii++) for(jj=0; jj<nx; jj++) hlam[ii][jj] = hhlam[ii][jj]; for(jj=1; jj<Ns-N; jj++) { //break; // shift measurements and initial prediction for(ii=0; ii<=N; ii++) { hhy[ii] = hy[ii+jj]; } // shift initial prediction and relative covariance for(ii=0; ii<nx; ii++) hhxp[0][ii] = hhxp[1][ii]; for(ii=0; ii<pnx*cnl; ii++) hpLp[0][ii] = hpLp[1][ii]; //d_print_mat(nx, N+1, hhxp[0], anx); //d_print_pmat(nx, nx, bs, hpLp[1]+(nx+nw+pad)*bs, cnl); //d_print_pmat(nz, nz, bs, hpLe[1], cnf); //d_print_pmat(nx, nx, bs, hpLp[2]+(nx+nw+pad)*bs, cnl); //d_print_pmat(nz, nz, bs, hpLe[2], cnf); d_ric_trf_mhe(nx, nw, ny, N, hpA, hpG, hpC, hpLp, hdLp, hpQ, hpR, hpLe, work); d_ric_trs_mhe(nx, nw, ny, N, hpA, hpG, hpC, hpLp, hdLp, hpQ, hpR, hpLe, hq, hr, hf, hhxp, hhxe, hhw, hhy, 1, hhlam, work); //d_print_mat(nx, N+1, hhxp[0], anx); //d_print_pmat(nx, nx, bs, hpLp[0]+(nx+nw+pad)*bs, cnl); //d_print_pmat(nz, nz, bs, hpLe[0], cnf); //d_print_pmat(nx, nx, bs, hpLp[1]+(nx+nw+pad)*bs, cnl); //d_print_pmat(nz, nz, bs, hpLe[1], cnf); // save data for(ii=0; ii<nx; ii++) hxe[N+jj][ii] = hhxe[N][ii]; for(ii=0; ii<nx; ii++) hxp[N+jj][ii] = hhxp[N][ii]; if(jj<Ns-N-1) for(ii=0; ii<nw; ii++) hw[N+jj][ii] = hhw[N-1][ii]; if(jj<Ns-N-1) for(ii=0; ii<nx; ii++) hlam[N+jj][ii] = hhlam[N-1][ii]; //break; } // print solution if(PRINTRES) { printf("\nx_p\n"); d_print_mat(nx, Ns, hxp[0], anx); printf("\nx_e\n"); d_print_mat(nx, Ns, hxe[0], anx); //printf("\nL_e\n"); //d_print_pmat(nx, nx, bs, hpLp[Ns-1]+(nx+nw+pad)*bs, cnl); } #endif /************************************************ * return ************************************************/ free(A); free(B); free(C); free(b); free(D); free(d); free(x0); free(Q); free(Qx); free(R); free(q); free(r); free(f); free(L0); free(pA); free(pG); free(pC); free(pQ); free(pR); free(pQA); free(pRG); free(work); free(work2); free(work3); free(work4); free(p_hxe); free(p_hxp); free(p_hy); free(p_hw); free(p_hlam); //free(p_hhxe); //free(p_hhxp); //free(p_hhw); //free(p_hhlam); free(x_temp); free(y_temp); free(p0); free(p_hr_res); free(p_hq_res); free(p_hf_res); free(pL0_inv); free(hpLp[0]); free(hdLp[0]); free(hpLe[0]); for(jj=0; jj<N; jj++) { free(hpLp[jj+1]); free(hdLp[jj+1]); free(hpLe[jj+1]); free(hpGLr[jj]); free(hpALe[jj]); free(hpLp2[jj]); } free(hpALe[N]); free(pQRAG); free(pQD); for(ii=0; ii<N; ii++) { free(hpLAG[ii]); free(hpLe2[ii]); } free(hpLAG[N]); free(hpLe2[N]); for(ii=0; ii<N; ii++) { free(hAGU[ii]); free(hUp[ii]); free(hUe[ii]); free(hUr[ii]); } free(hUp[N]); free(hUe[N]); free(Ud); free(work_ref); } // increase size fprintf(f, "];\n"); fclose(f); return 0; }
int main() { printf("\n"); printf("\n"); printf("\n"); printf(" HPMPC -- Library for High-Performance implementation of solvers for MPC.\n"); printf(" Copyright (C) 2014-2015 by Technical University of Denmark. All rights reserved.\n"); printf("\n"); printf(" HPMPC is distributed in the hope that it will be useful,\n"); printf(" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"); printf(" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"); printf(" See the GNU Lesser General Public License for more details.\n"); printf("\n"); printf("\n"); printf("\n"); #if defined(TARGET_X64_AVX2) || defined(TARGET_X64_AVX) || defined(TARGET_X64_SSE3) || defined(TARGET_X86_ATOM) || defined(TARGET_AMD_SSE3) _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); // flush to zero subnormals !!! works only with one thread !!! #endif int ii, jj; int rep, nrep=1000; //000;//NREP; int nx_ = 8;//NX; // number of states (it has to be even for the mass-spring system test problem) int nu_ = 3;//NU; // number of inputs (controllers) (it has to be at least 1 and at most nx/2 for the mass-spring system test problem) int N = 10;//NN; // horizon lenght // int nb = nu+nx; // number of box constrained inputs and states // int ng = nx; //4; // number of general constraints // int ngN = nx; // number of general constraints at the last stage printf("\nN = %d, nx = %d, nu = %d\n\n", N, nx_, nu_); #define MHE 0 // int nbu = nu<nb ? nu : nb ; // int nbx = nb-nu>0 ? nb-nu : 0; // stage-wise variant size int nx[N+1]; #if MHE==1 nx[0] = nx_; #else nx[0] = 0; #endif for(ii=1; ii<=N; ii++) nx[ii] = nx_; int nu[N+1]; for(ii=0; ii<N; ii++) nu[ii] = nu_; nu[N] = 0; // XXX int nb[N+1]; nb[0] = nu[0] + nx[0]/2; for(ii=1; ii<N; ii++) nb[ii] = nu[1] + nx[ii]/2; nb[N] = nu[N] + nx[N]/2; int ng[N+1]; for(ii=0; ii<N; ii++) ng[ii] = 0; //ng; ng[N] = 0; //ngN; // ng[M] = nx_; // XXX /************************************************ * IPM common arguments ************************************************/ int hpmpc_status; int kk = -1; int k_max = 10; double mu0 = 2.0; double mu_tol = 1e-20; double alpha_min = 1e-8; int warm_start = 0; // read initial guess from x and u double *stat; d_zeros(&stat, k_max, 5); int compute_res = 1; int compute_mult = 1; /************************************************ * dynamical system ************************************************/ double *A; d_zeros(&A, nx_, nx_); // states update matrix double *B; d_zeros(&B, nx_, nu_); // inputs matrix double *b; d_zeros_align(&b, nx_, 1); // states offset double *x0; d_zeros_align(&x0, nx_, 1); // initial state double Ts = 0.5; // sampling time mass_spring_system(Ts, nx_, nu_, N, A, B, b, x0); for(jj=0; jj<nx_; jj++) b[jj] = 0.1; for(jj=0; jj<nx_; jj++) x0[jj] = 0; x0[0] = 2.5; x0[1] = 2.5; #if MHE!=1 struct blasfeo_dvec sx0; blasfeo_allocate_dvec(nx_, &sx0); blasfeo_pack_dvec(nx_, x0, &sx0, 0); struct blasfeo_dvec sb; blasfeo_allocate_dvec(nx_, &sb); blasfeo_pack_dvec(nx_, b, &sb, 0); struct blasfeo_dmat sA; blasfeo_allocate_dmat(nx_, nx_, &sA); blasfeo_pack_dmat(nx_, nx_, A, nx_, &sA, 0, 0); struct blasfeo_dvec sb0; blasfeo_allocate_dvec(nx_, &sb0); blasfeo_dgemv_n(nx_, nx_, 1.0, &sA, 0, 0, &sx0, 0, 1.0, &sb, 0, &sb0, 0); struct blasfeo_dmat sBAbt0; blasfeo_allocate_dmat(nu[0]+1, nx[1], &sBAbt0); blasfeo_pack_tran_dmat(nx_, nu_, B, nx_, &sBAbt0, 0, 0); blasfeo_drowin(nx[1], 1.0, &sb0, 0, &sBAbt0, nu[0], 0); // d_print_strmat(nu[0]+1, nx[1], &sBAbt0, 0, 0); #endif struct blasfeo_dmat sBAbt1; if(N>1) { blasfeo_allocate_dmat(nu[1]+nx[1]+1, nx[2], &sBAbt1); blasfeo_pack_tran_dmat(nx_, nu_, B, nx_, &sBAbt1, 0, 0); blasfeo_pack_tran_dmat(nx_, nx_, A, nx_, &sBAbt1, nu[1], 0); blasfeo_pack_tran_dmat(nx_, 1, b, nx_, &sBAbt1, nu[1]+nx[1], 0); // d_print_strmat(nu[1]+nx[1]+1, nx[2], &sBAbt1, 0, 0); } /************************************************ * cost function ************************************************/ double *R; d_zeros(&R, nu_, nu_); for(ii=0; ii<nu_; ii++) R[ii*(nu_+1)] = 2.0; double *S; d_zeros(&S, nu_, nx_); double *Q; d_zeros(&Q, nx_, nx_); for(ii=0; ii<nx_; ii++) Q[ii*(nx_+1)] = 1.0; double *r; d_zeros(&r, nu_, 1); for(ii=0; ii<nu_; ii++) r[ii] = 0.2; double *q; d_zeros(&q, nx_, 1); for(ii=0; ii<nx_; ii++) q[ii] = 0.1; #if MHE!=1 struct blasfeo_dvec sr; blasfeo_allocate_dvec(nu_, &sr); blasfeo_pack_dvec(nu_, r, &sr, 0); struct blasfeo_dmat sS; blasfeo_allocate_dmat(nu_, nx_, &sS); blasfeo_pack_dmat(nu_, nx_, S, nu_, &sS, 0, 0); struct blasfeo_dvec sr0; blasfeo_allocate_dvec(nu_, &sr0); blasfeo_dgemv_n(nu_, nx_, 1.0, &sS, 0, 0, &sx0, 0, 1.0, &sr, 0, &sr0, 0); struct blasfeo_dmat sRSQrq0; blasfeo_allocate_dmat(nu[0]+nx[0]+1, nu[0]+nx[0], &sRSQrq0); blasfeo_pack_dmat(nu_, nu_, R, nu_, &sRSQrq0, 0, 0); blasfeo_drowin(nu[0], 1.0, &sr0, 0, &sRSQrq0, nu[0], 0); // d_print_strmat(nu[0]+nx[0]+1, nu[0]+nx[0], &sRSQrq0, 0, 0); struct blasfeo_dvec srq0; blasfeo_allocate_dvec(nu[0]+nx[0], &srq0); blasfeo_dveccp(nu[0], 1.0, &sr0, 0, &srq0, 0); #endif struct blasfeo_dmat sRSQrq1; struct blasfeo_dvec srq1; if(N>1) { blasfeo_allocate_dmat(nu[1]+nx[1]+1, nu[1]+nx[1], &sRSQrq1); blasfeo_pack_dmat(nu_, nu_, R, nu_, &sRSQrq1, 0, 0); blasfeo_pack_tran_dmat(nu_, nx_, S, nu_, &sRSQrq1, nu[1], 0); blasfeo_pack_dmat(nx_, nx_, Q, nx_, &sRSQrq1, nu[1], nu[1]); blasfeo_pack_tran_dmat(nu_, 1, r, nu_, &sRSQrq1, nu[1]+nx[1], 0); blasfeo_pack_tran_dmat(nx_, 1, q, nx_, &sRSQrq1, nu[1]+nx[1], nu[1]); // d_print_strmat(nu[1]+nx[1]+1, nu[1]+nx[1], &sRSQrq1, 0, 0); blasfeo_allocate_dvec(nu[1]+nx[1], &srq1); blasfeo_pack_dvec(nu_, r, &srq1, 0); blasfeo_pack_dvec(nx_, q, &srq1, nu[1]); } struct blasfeo_dmat sRSQrqN; blasfeo_allocate_dmat(nx[N]+1, nx[N], &sRSQrqN); blasfeo_pack_dmat(nx_, nx_, Q, nx_, &sRSQrqN, 0, 0); blasfeo_pack_tran_dmat(nx_, 1, q, nx_, &sRSQrqN, nx[1], 0); // d_print_strmat(nu[N]+nx[N]+1, nu[N]+nx[N], &sRSQrqN, 0, 0); struct blasfeo_dvec srqN; blasfeo_allocate_dvec(nx[N], &srqN); blasfeo_pack_dvec(nx_, q, &srqN, 0); /************************************************ * constraints ************************************************/ #if MHE!=1 double *d0; d_zeros(&d0, 2*nb[0]+2*ng[0], 1); int *idxb0; int_zeros(&idxb0, nb[0], 1); // inputs for(ii=0; ii<nu[0]; ii++) { d0[ii] = - 0.5; // u_min d0[nb[0]+ng[0]+ii] = + 0.5; // u_max idxb0[ii] = ii; } // states for( ; ii<nb[0]; ii++) { d0[ii] = - 4.0; // x_min d0[nb[0]+ng[0]+ii] = + 4.0; // x_max idxb0[ii] = ii; } #endif double *d1; int *idxb1; if(N>1) { d_zeros(&d1, 2*nb[1]+2*ng[1], 1); int_zeros(&idxb1, nb[1], 1); // inputs for(ii=0; ii<nu[1]; ii++) { d1[ii] = - 0.5; // u_min d1[nb[1]+ng[1]+ii] = + 0.5; // u_max idxb1[ii] = ii; } // states for( ; ii<nb[1]; ii++) { d1[ii] = - 4.0; // x_min d1[nb[1]+ng[1]+ii] = + 4.0; // x_max idxb1[ii] = ii; } } double *dN; d_zeros(&dN, 2*nb[N]+2*ng[N], 1); int *idxbN; int_zeros(&idxbN, nb[N], 1); // no inputs // states for(ii=0 ; ii<nb[N]; ii++) { dN[ii] = - 4.0; // x_min dN[nb[N]+ng[N]+ii] = + 4.0; // x_max idxbN[ii] = ii; } struct blasfeo_dvec sd0; blasfeo_allocate_dvec(2*nb[0]+2*ng[0], &sd0); blasfeo_pack_dvec(2*nb[0]+2*ng[0], d0, &sd0, 0); // blasfeo_print_tran_dvec(2*nb[0], &sd0, 0); struct blasfeo_dvec sd1; blasfeo_allocate_dvec(2*nb[1]+2*ng[1], &sd1); blasfeo_pack_dvec(2*nb[1]+2*ng[1], d1, &sd1, 0); // blasfeo_print_tran_dvec(2*nb[1], &sd1, 0); struct blasfeo_dvec sdN; blasfeo_allocate_dvec(2*nb[N]+2*ng[N], &sdN); blasfeo_pack_dvec(2*nb[N]+2*ng[N], dN, &sdN, 0); // blasfeo_print_tran_dvec(2*nb[N], &sdN, 0); /************************************************ * array of data matrices ************************************************/ // original MPC struct blasfeo_dmat hsBAbt[N]; struct blasfeo_dvec hsb[N]; struct blasfeo_dmat hsRSQrq[N+1]; struct blasfeo_dvec hsrq[N+1]; struct blasfeo_dmat hsDCt[N+1]; // XXX struct blasfeo_dvec hsd[N+1]; int *hidxb[N+1]; ii = 0; #if MHE!=1 hsBAbt[ii] = sBAbt0; hsb[ii] = sb0; hsRSQrq[ii] = sRSQrq0; hsrq[ii] = srq0; hsd[ii] = sd0; hidxb[0] = idxb0; #else hsBAbt[ii] = sBAbt1; hsb[ii] = sb; hsRSQrq[ii] = sRSQrq1; hsrq[ii] = srq1; hsd[ii] = sd1; hidxb[0] = idxb1; #endif for(ii=1; ii<N; ii++) { hsBAbt[ii] = sBAbt1; hsb[ii] = sb; hsRSQrq[ii] = sRSQrq1; hsrq[ii] = srq1; hsd[ii] = sd1; hidxb[ii] = idxb1; } hsRSQrq[ii] = sRSQrqN; hsrq[ii] = srqN; hsd[ii] = sdN; hidxb[N] = idxbN; /************************************************ * solve full spase system using Riccati / IPM ************************************************/ // result vectors struct blasfeo_dvec hsux[N+1]; struct blasfeo_dvec hspi[N+1]; struct blasfeo_dvec hslam[N+1]; struct blasfeo_dvec hst[N+1]; for(ii=0; ii<=N; ii++) { blasfeo_allocate_dvec(nu[ii]+nx[ii], &hsux[ii]); blasfeo_allocate_dvec(nx[ii], &hspi[ii]); blasfeo_allocate_dvec(2*nb[ii]+2*ng[ii], &hslam[ii]); blasfeo_allocate_dvec(2*nb[ii]+2*ng[ii], &hst[ii]); } // work space void *work_space_ipm; v_zeros_align(&work_space_ipm, d_ip2_res_mpc_hard_work_space_size_bytes_libstr(N, nx, nu, nb, ng)); struct timeval tv0, tv1; printf("\nsolving... (full space system)\n"); gettimeofday(&tv0, NULL); // stop for(rep=0; rep<nrep; rep++) { hpmpc_status = d_ip2_res_mpc_hard_libstr(&kk, k_max, mu0, mu_tol, alpha_min, warm_start, stat, N, nx, nu, nb, hidxb, ng, hsBAbt, hsRSQrq, hsDCt, hsd, hsux, 1, hspi, hslam, hst, work_space_ipm); } gettimeofday(&tv1, NULL); // stop printf("\n... done\n"); float time_ipm_full = (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6); printf("\nstatistics from last run\n\n"); for(jj=0; jj<kk; jj++) printf("k = %d\tsigma = %f\talpha = %f\tmu = %f\t\tmu = %e\talpha = %f\tmu = %f\tmu = %e\n", jj, stat[5*jj], stat[5*jj+1], stat[5*jj+2], stat[5*jj+2], stat[5*jj+3], stat[5*jj+4], stat[5*jj+4]); printf("\n"); printf("\nux =\n\n"); for(ii=0; ii<=N; ii++) blasfeo_print_tran_dvec(nu[ii]+nx[ii], &hsux[ii], 0); printf("\npi =\n\n"); for(ii=0; ii<=N; ii++) blasfeo_print_tran_dvec(nx[ii], &hspi[ii], 0); printf("\nlam =\n\n"); for(ii=0; ii<=N; ii++) blasfeo_print_tran_dvec(2*nb[ii]+2*ng[ii], &hslam[ii], 0); printf("\nt =\n\n"); for(ii=0; ii<=N; ii++) blasfeo_print_tran_dvec(2*nb[ii]+2*ng[ii], &hst[ii], 0); // residuals vectors struct blasfeo_dvec hsrrq[N+1]; struct blasfeo_dvec hsrb[N+1]; struct blasfeo_dvec hsrd[N+1]; struct blasfeo_dvec hsrm[N+1]; double mu; for(ii=0; ii<N; ii++) { blasfeo_allocate_dvec(nu[ii]+nx[ii], &hsrrq[ii]); blasfeo_allocate_dvec(nx[ii+1], &hsrb[ii]); blasfeo_allocate_dvec(2*nb[ii]+2*ng[ii], &hsrd[ii]); blasfeo_allocate_dvec(2*nb[ii]+2*ng[ii], &hsrm[ii]); } blasfeo_allocate_dvec(nu[N]+nx[N], &hsrrq[N]); blasfeo_allocate_dvec(2*nb[N]+2*ng[N], &hsrd[N]); blasfeo_allocate_dvec(2*nb[N]+2*ng[N], &hsrm[N]); int ngM = ng[0]; for(ii=1; ii<=N; ii++) { ngM = ng[ii]>ngM ? ng[ii] : ngM; } void *work_space_res; v_zeros_align(&work_space_res, d_res_res_mpc_hard_work_space_size_bytes_libstr(N, nx, nu, nb, ng)); d_res_res_mpc_hard_libstr(N, nx, nu, nb, hidxb, ng, hsBAbt, hsb, hsRSQrq, hsrq, hsux, hsDCt, hsd, hspi, hslam, hst, hsrrq, hsrb, hsrd, hsrm, &mu, work_space_res); printf("\nres_rq\n"); for(ii=0; ii<=N; ii++) blasfeo_print_exp_tran_dvec(nu[ii]+nx[ii], &hsrrq[ii], 0); printf("\nres_b\n"); for(ii=0; ii<N; ii++) blasfeo_print_exp_tran_dvec(nx[ii+1], &hsrb[ii], 0); printf("\nres_d\n"); for(ii=0; ii<=N; ii++) blasfeo_print_exp_tran_dvec(2*nb[ii]+2*ng[ii], &hsrd[ii], 0); printf("\nres_m\n"); for(ii=0; ii<=N; ii++) blasfeo_print_exp_tran_dvec(2*nb[ii]+2*ng[ii], &hsrm[ii], 0); /************************************************ * full condensing ************************************************/ // condensed problem size int N2 = 1; int nx2[N2+1]; int nu2[N2+1]; int nb2[N2+1]; int ng2[N2+1]; d_cond_compute_problem_size_libstr(N, nx, nu, nb, hidxb, ng, nx2, nu2, nb2, ng2); #if 0 for(ii=0; ii<=N2; ii++) printf("\n%d %d %d %d\n", nx2[ii], nu2[ii], nb2[ii], ng2[ii]); #endif int work_sizes_cond[5]; int work_size_cond = d_cond_work_space_size_bytes_libstr(N, nx, nu, nb, hidxb, ng, nx2, nu2, nb2, ng2, work_sizes_cond); int memo_size_cond = d_cond_memory_space_size_bytes_libstr(N, nx, nu, nb, hidxb, ng, nx, nu2, nb2, ng2); int work_size_ipm_cond = d_ip2_res_mpc_hard_work_space_size_bytes_libstr(N2, nx2, nu2, nb2, ng2); int work_sizes_expa[2]; int work_size_expa = d_expand_work_space_size_bytes_libstr(N, nx, nu, nb, ng, work_sizes_expa); // work space void *work_cond; void *memo_cond; void *work_ipm_cond; void *work_expa; v_zeros_align(&work_cond, work_size_cond); v_zeros_align(&memo_cond, memo_size_cond); v_zeros_align(&work_ipm_cond, work_size_ipm_cond); v_zeros_align(&work_expa, work_size_expa); // data matrices struct blasfeo_dmat hsBAbt2[N2]; struct blasfeo_dvec hsb2[N2]; struct blasfeo_dmat hsRSQrq2[N2+1]; struct blasfeo_dvec hsrq2[N2+1]; struct blasfeo_dmat hsDCt2[N2+1]; struct blasfeo_dvec hsd2[N2+1]; int *hidxb2[N2+1]; for(ii=0; ii<N2; ii++) blasfeo_allocate_dmat(nu2[ii]+nx2[ii]+1, nx2[ii+1], &hsBAbt2[ii]); for(ii=0; ii<N2; ii++) blasfeo_allocate_dvec(nx2[ii+1], &hsb2[ii]); for(ii=0; ii<=N2; ii++) blasfeo_allocate_dmat(nu2[ii]+nx2[ii]+1, nu2[ii]+nx2[ii], &hsRSQrq2[ii]); for(ii=0; ii<=N2; ii++) blasfeo_allocate_dvec(nu2[ii]+nx2[ii], &hsrq2[ii]); for(ii=0; ii<=N2; ii++) blasfeo_allocate_dmat(nu2[ii]+nx2[ii]+1, ng2[ii], &hsDCt2[ii]); for(ii=0; ii<=N2; ii++) blasfeo_allocate_dvec(2*nb2[ii]+2*ng2[ii], &hsd2[ii]); for(ii=0; ii<=N2; ii++) int_zeros(&hidxb2[ii], nb2[ii], 1); // result vectors struct blasfeo_dvec hsux2[N2+1]; struct blasfeo_dvec hspi2[N2+1]; struct blasfeo_dvec hslam2[N2+1]; struct blasfeo_dvec hst2[N2+1]; for(ii=0; ii<=N2; ii++) { blasfeo_allocate_dvec(nu2[ii]+nx2[ii], &hsux2[ii]); blasfeo_allocate_dvec(nx2[ii], &hspi2[ii]); blasfeo_allocate_dvec(2*nb2[ii]+2*ng2[ii], &hslam2[ii]); blasfeo_allocate_dvec(2*nb2[ii]+2*ng2[ii], &hst2[ii]); } d_cond_libstr(N, nx, nu, nb, hidxb, ng, hsBAbt, hsRSQrq, hsDCt, hsd, nx2, nu2, nb2, hidxb2, ng2, hsBAbt2, hsRSQrq2, hsDCt2, hsd2, memo_cond, work_cond, work_sizes_cond); #if 0 printf("\nBAbt2\n"); for(ii=0; ii<N2; ii++) d_print_strmat(nu2[ii]+nx2[ii]+1, nx2[ii+1], &hsBAbt2[ii], 0, 0); printf("\nRSQrq2\n"); for(ii=0; ii<=N2; ii++) d_print_strmat(nu2[ii]+nx2[ii]+1, nu2[ii]+nx2[ii], &hsRSQrq2[ii], 0, 0); printf("\nDCt2\n"); for(ii=0; ii<=N2; ii++) d_print_strmat(nu2[ii]+nx2[ii], ng2[ii], &hsDCt2[ii], 0, 0); printf("\nd2\n"); for(ii=0; ii<=N2; ii++) blasfeo_print_tran_dvec(2*nb2[ii]+2*ng2[ii], &hsd2[ii], 0); #endif /************************************************ * solve condensed system using IPM ************************************************/ // zero solution for(ii=0; ii<=N; ii++) blasfeo_dvecse(nu[ii]+nx[ii], 0.0, &hsux[ii], 0); for(ii=0; ii<=N; ii++) blasfeo_dvecse(nx[ii], 0.0, &hspi[ii], 0); for(ii=0; ii<=N; ii++) blasfeo_dvecse(2*nb[ii]+2*ng[ii], 0.0, &hslam[ii], 0); for(ii=0; ii<=N; ii++) blasfeo_dvecse(2*nb[ii]+2*ng[ii], 0.0, &hst[ii], 0); printf("\nsolving... (condensed system)\n"); gettimeofday(&tv0, NULL); // stop for(rep=0; rep<nrep; rep++) { d_cond_libstr(N, nx, nu, nb, hidxb, ng, hsBAbt, hsRSQrq, hsDCt, hsd, nx2, nu2, nb2, hidxb2, ng2, hsBAbt2, hsRSQrq2, hsDCt2, hsd2, memo_cond, work_cond, work_sizes_cond); hpmpc_status = d_ip2_res_mpc_hard_libstr(&kk, k_max, mu0, mu_tol, alpha_min, warm_start, stat, N2, nx2, nu2, nb2, hidxb2, ng2, hsBAbt2, hsRSQrq2, hsDCt2, hsd2, hsux2, 1, hspi2, hslam2, hst2, work_ipm_cond); d_expand_solution_libstr(N, nx, nu, nb, hidxb, ng, hsBAbt, hsb, hsRSQrq, hsrq, hsDCt, hsux, hspi, hslam, hst, nx2, nu2, nb2, hidxb2, ng2, hsux2, hspi2, hslam2, hst2, work_expa, work_sizes_expa); } gettimeofday(&tv1, NULL); // stop printf("\n... done\n"); float time_ipm_cond = (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6); printf("\nstatistics from last run\n\n"); for(jj=0; jj<kk; jj++) printf("k = %d\tsigma = %f\talpha = %f\tmu = %f\t\tmu = %e\talpha = %f\tmu = %f\tmu = %e\n", jj, stat[5*jj], stat[5*jj+1], stat[5*jj+2], stat[5*jj+2], stat[5*jj+3], stat[5*jj+4], stat[5*jj+4]); printf("\n"); #if 0 printf("\nux2 =\n\n"); for(ii=0; ii<=N2; ii++) blasfeo_print_tran_dvec(nu2[ii]+nx2[ii], &hsux2[ii], 0); printf("\npi2 =\n\n"); for(ii=0; ii<=N2; ii++) blasfeo_print_tran_dvec(nx2[ii], &hspi2[ii], 0); printf("\nlam2 =\n\n"); for(ii=0; ii<=N2; ii++) blasfeo_print_tran_dvec(2*nb2[ii]+2*ng2[ii], &hslam2[ii], 0); printf("\nt2 =\n\n"); for(ii=0; ii<=N2; ii++) blasfeo_print_tran_dvec(2*nb2[ii]+2*ng2[ii], &hst2[ii], 0); #endif printf("\nux =\n\n"); for(ii=0; ii<=N; ii++) blasfeo_print_tran_dvec(nu[ii]+nx[ii], &hsux[ii], 0); printf("\npi =\n\n"); for(ii=0; ii<=N; ii++) blasfeo_print_tran_dvec(nx[ii], &hspi[ii], 0); printf("\nlam =\n\n"); for(ii=0; ii<=N; ii++) blasfeo_print_tran_dvec(2*nb[ii]+2*ng[ii], &hslam[ii], 0); printf("\nt =\n\n"); for(ii=0; ii<=N; ii++) blasfeo_print_tran_dvec(2*nb[ii]+2*ng[ii], &hst[ii], 0); /************************************************ * free memory full space ************************************************/ // TODO d_free(A); d_free(B); d_free(b); d_free(x0); d_free(R); d_free(S); d_free(Q); d_free(r); d_free(q); d_free(d0); int_free(idxb0); d_free(d1); int_free(idxb1); d_free(dN); int_free(idxbN); v_free_align(work_space_ipm); blasfeo_free_dvec(&sx0); blasfeo_free_dvec(&sb); blasfeo_free_dmat(&sA); blasfeo_free_dvec(&sb0); blasfeo_free_dmat(&sBAbt0); if(N>1) blasfeo_free_dmat(&sBAbt1); blasfeo_free_dvec(&sr); blasfeo_free_dmat(&sS); blasfeo_free_dvec(&sr0); blasfeo_free_dmat(&sRSQrq0); blasfeo_free_dvec(&srq0); if(N>1) blasfeo_free_dmat(&sRSQrq1); if(N>1) blasfeo_free_dvec(&srq1); blasfeo_free_dmat(&sRSQrqN); blasfeo_free_dvec(&srqN); blasfeo_free_dvec(&sd0); blasfeo_free_dvec(&sd1); blasfeo_free_dvec(&sdN); for(ii=0; ii<N; ii++) { blasfeo_free_dvec(&hsux[ii]); blasfeo_free_dvec(&hspi[ii]); blasfeo_free_dvec(&hslam[ii]); blasfeo_free_dvec(&hst[ii]); blasfeo_free_dvec(&hsrrq[ii]); blasfeo_free_dvec(&hsrb[ii]); blasfeo_free_dvec(&hsrd[ii]); blasfeo_free_dvec(&hsrm[ii]); } ii = N; blasfeo_free_dvec(&hsux[ii]); blasfeo_free_dvec(&hspi[ii]); blasfeo_free_dvec(&hslam[ii]); blasfeo_free_dvec(&hst[ii]); blasfeo_free_dvec(&hsrrq[ii]); blasfeo_free_dvec(&hsrd[ii]); blasfeo_free_dvec(&hsrm[ii]); v_free_align(work_space_res); /************************************************ * print timings ************************************************/ printf("\ntime ipm full (in sec): %e", time_ipm_full); printf("\ntime ipm cond (in sec): %e\n\n", time_ipm_cond); /************************************************ * return ************************************************/ return 0; }
int main() { printf("\n"); printf("\n"); printf("\n"); printf(" HPMPC -- Library for High-Performance implementation of solvers for MPC.\n"); printf(" Copyright (C) 2014-2015 by Technical University of Denmark. All rights reserved.\n"); printf("\n"); printf(" HPMPC is distributed in the hope that it will be useful,\n"); printf(" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"); printf(" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"); printf(" See the GNU Lesser General Public License for more details.\n"); printf("\n"); printf("\n"); printf("\n"); #if defined(TARGET_X64_AVX2) || defined(TARGET_X64_AVX) || defined(TARGET_X64_SSE3) || defined(TARGET_X86_ATOM) || defined(TARGET_AMD_SSE3) /* printf("\nflush subnormals to zero\n");*/ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); // flush to zero subnormals !!! works only with one thread !!! #endif int ii, jj, idx; int rep, nrep=NREP; int nx = NX; // number of states (it has to be even for the mass-spring system test problem) int nu = NU; // number of inputs (controllers) (it has to be at least 1 and at most nx/2 for the mass-spring system test problem) int N = NN; // horizon lenght // int nb = NB; // number of box constrained inputs and states int nh = nu;//nu+nx/2; // number of hard box constraints int ns = nx;//nx/2;//nx; // number of soft box constraints int nb = nh + ns; int nhu = nu<nh ? nu : nh ; printf(" Test problem: mass-spring system with %d masses and %d controls.\n", nx/2, nu); printf("\n"); printf(" MPC problem size: %d states, %d inputs, %d horizon length, %d two-sided box constraints on inputs and states, %d two-sided soft constraints on states.\n", nx, nu, N, nh, ns); printf("\n"); #if IP == 1 printf(" IP method parameters: primal-dual IP, double precision, %d maximum iterations, %5.1e exit tolerance in duality measure (edit file test_d_ip_box.c to change them).\n", K_MAX, MU_TOL); #elif IP == 2 printf(" IP method parameters: predictor-corrector IP, double precision, %d maximum iterations, %5.1e exit tolerance in duality measure (edit file test_d_ip_box.c to change them).\n", K_MAX, MU_TOL); #else printf(" Wrong value for IP solver choice: %d\n", IP); #endif int info = 0; const int bs = D_MR; //d_get_mr(); const int ncl = D_NCL; const int nal = bs*ncl; // number of doubles per cache line const int nz = nx+nu+1; const int pnz = bs*((nz+bs-1)/bs); const int pnx = bs*((nx+bs-1)/bs); const int pnu = bs*((nu+bs-1)/bs); const int pnb = bs*((2*nb+bs-1)/bs); // packed number of box constraints const int cnz = ncl*((nx+nu+1+ncl-1)/ncl); const int cnx = ncl*((nx+ncl-1)/ncl); const int anz = nal*((nz+nal-1)/nal); const int anx = nal*((nx+nal-1)/nal); // const int pad = (ncl-nx%ncl)%ncl; // packing between BAbtL & P // const int cnl = cnz<cnx+ncl ? nx+pad+cnx+ncl : nx+pad+cnz; const int cnl = cnz<cnx+ncl ? cnx+ncl : cnz; /************************************************ * dynamical system ************************************************/ double *A; d_zeros(&A, nx, nx); // states update matrix double *B; d_zeros(&B, nx, nu); // inputs matrix double *b; d_zeros(&b, nx, 1); // states offset double *x0; d_zeros(&x0, nx, 1); // initial state double Ts = 0.5; // sampling time mass_spring_system(Ts, nx, nu, N, A, B, b, x0); for(jj=0; jj<nx; jj++) b[jj] = 0.0; for(jj=0; jj<nx; jj++) x0[jj] = 0; x0[0] = 3.5; x0[1] = 3.5; // d_print_mat(nx, nx, A, nx); // d_print_mat(nx, nu, B, nx); // d_print_mat(nx, 1, b, nx); // d_print_mat(nx, 1, x0, nx); /* packed */ /* double *BAb; d_zeros(&BAb, nx, nz);*/ /* dmcopy(nx, nu, B, nx, BAb, nx);*/ /* dmcopy(nx, nx, A, nx, BAb+nu*nx, nx);*/ /* dmcopy(nx, 1 , b, nx, BAb+(nu+nx)*nx, nx);*/ /* transposed */ /* double *BAbt; d_zeros_align(&BAbt, pnz, pnz);*/ /* for(ii=0; ii<nx; ii++)*/ /* for(jj=0; jj<nz; jj++)*/ /* {*/ /* BAbt[jj+pnz*ii] = BAb[ii+nx*jj];*/ /* }*/ /* packed into contiguous memory */ double *pBAbt; d_zeros_align(&pBAbt, pnz, cnx); /* d_cvt_mat2pmat(nz, nx, BAbt, pnz, 0, pBAbt, cnx);*/ /* d_cvt_tran_mat2pmat(nx, nz, BAb, nx, 0, pBAbt, cnx);*/ d_cvt_tran_mat2pmat(nx, nu, B, nx, 0, pBAbt, cnx); d_cvt_tran_mat2pmat(nx, nx, A, nx, nu, pBAbt+nu/bs*cnx*bs+nu%bs, cnx); for (jj = 0; jj<nx; jj++) pBAbt[(nx+nu)/bs*cnx*bs+(nx+nu)%bs+jj*bs] = b[jj]; /* d_print_pmat (nz, nx, bs, pBAbt, cnx);*/ /* exit(1);*/ /************************************************ * box constraints ************************************************/ double *db; d_zeros_align(&db, 2*nb, 1); jj=0; for( ; jj<2*nhu; jj++) db[jj] = - 0.5; // umin for( ; jj<2*nh; jj++) db[jj] = - 4.0; // xmin_hard for( ; jj<2*nb; jj++) db[jj] = - 1.0; // xmin_soft /************************************************ * cost function ************************************************/ double *Q; d_zeros(&Q, nz, nz); for(ii=0; ii<nu; ii++) Q[ii*(nz+1)] = 2.0; for(; ii<nz; ii++) Q[ii*(nz+1)] = 0.0; for(ii=0; ii<nu; ii++) Q[nx+nu+ii*nz] = 0.2; for(; ii<nz; ii++) Q[nx+nu+ii*nz] = 0.1; /* Q[(nx+nu)*(pnz+1)] = 1e35; // large enough (not needed any longer) */ /* packed into contiguous memory */ double *pQ; d_zeros_align(&pQ, pnz, cnz); d_cvt_mat2pmat(nz, nz, Q, nz, 0, pQ, cnz); // cost function of the soft constrained slack variables double *Z; d_zeros_align(&Z, pnb, 1); for(ii=0; ii<2*ns; ii++) Z[2*nh+ii] = 0.0; //for(ii=0; ii<nx; ii++) Z[2*nu+2*ii+0] = 100.0; double *z; d_zeros_align(&z, pnb, 1); for(ii=0; ii<2*ns; ii++) z[2*nh+ii] = 100.0; // maximum element in cost functions double mu0 = 1.0; for(ii=0; ii<nu+nx; ii++) for(jj=0; jj<nu+nx; jj++) mu0 = fmax(mu0, Q[jj+nz*ii]); for(ii=0; ii<2*ns; ii++) { mu0 = fmax(mu0, Z[2*nh+ii]); mu0 = fmax(mu0, z[2*nh+ii]); } //printf("\n mu0 = %f\n", mu0); /************************************************ * matrices series ************************************************/ double *hpQ[N+1]; double *hq[N+1]; double *hZ[N+1]; double *hz[N+1]; double *hux[N+1]; double *hpi[N+1]; double *hlam[N+1]; double *ht[N+1]; double *hpBAbt[N]; double *hdb[N+1]; double *hrb[N]; double *hrq[N+1]; double *hrd[N+1]; double *hrz[N+1]; for(jj=0; jj<N; jj++) { //d_zeros_align(&hpQ[jj], pnz, cnz); hpQ[jj] = pQ; } //d_zeros_align(&hpQ[N], pnz, pnz); hpQ[N] = pQ; for(jj=0; jj<N; jj++) { d_zeros_align(&hq[jj], anz, 1); hZ[jj] = Z; hz[jj] = z; d_zeros_align(&hux[jj], anz, 1); d_zeros_align(&hpi[jj], anx, 1); d_zeros_align(&hlam[jj],2*pnb, 1); // TODO pnb d_zeros_align(&ht[jj], 2*pnb, 1); // TODO pnb hpBAbt[jj] = pBAbt; hdb[jj] = db; d_zeros_align(&hrb[jj], anx, 1); d_zeros_align(&hrq[jj], anz, 1); d_zeros_align(&hrd[jj], pnb, 1); // TODO pnb d_zeros_align(&hrz[jj], pnb, 1); // TODO pnb } d_zeros_align(&hq[N], anz, 1); hZ[N] = Z; hz[N] = z; d_zeros_align(&hux[N], anz, 1); d_zeros_align(&hpi[N], anx, 1); d_zeros_align(&hlam[N], 2*pnb, 1); // TODO pnb d_zeros_align(&ht[N], 2*pnb, 1); // TODO pnb hdb[N] = db; d_zeros_align(&hrq[N], anz, 1); d_zeros_align(&hrd[N], pnb, 1); // TODO pnb d_zeros_align(&hrz[N], pnb, 1); // TODO pnb // starting guess for(jj=0; jj<nx; jj++) hux[0][nu+jj]=x0[jj]; /************************************************ * riccati-like iteration ************************************************/ // double *work; d_zeros_align(&work, (N+1)*(pnz*cnl + 5*anz + 10*pnb + 2*anx) + 3*anz, 1); // work space double *work; d_zeros_align(&work, (N+1)*(pnz*cnl + pnz + 5*anz + 10*pnb + 2*anx) + anz + pnz*cnx, 1); // work space /* for(jj=0; jj<( (N+1)*(pnz*cnl + 4*anz + 4*pnb + 2*anx) + 3*anz ); jj++) work[jj] = -1.0;*/ int kk = 0; // acutal number of iterations /* char prec = PREC; // double/single precision*/ /* double sp_thr = SP_THR; // threshold to switch between double and single precision*/ int k_max = K_MAX; // maximum number of iterations in the IP method double mu_tol = MU_TOL; // tolerance in the duality measure double alpha_min = ALPHA_MIN; // minimum accepted step length double sigma[] = {0.4, 0.3, 0.01}; // control primal-dual IP behaviour double *stat; d_zeros(&stat, 5, k_max); // stats from the IP routine int compute_mult = COMPUTE_MULT; int warm_start = WARM_START; double mu = -1.0; int hpmpc_status; /* initizile the cost function */ // for(ii=0; ii<N; ii++) // { // for(jj=0; jj<pnz*cnz; jj++) hpQ[ii][jj]=pQ[jj]; // } // for(jj=0; jj<pnz*cnz; jj++) hpQ[N][jj]=pQ[jj]; // initial states double xx0[] = {3.5, 3.5, 3.66465, 2.15833, 1.81327, -0.94207, 1.86531, -2.35760, 2.91534, 1.79890, -1.49600, -0.76600, -2.60268, 1.92456, 1.66630, -2.28522, 3.12038, 1.83830, 1.93519, -1.87113}; /* warm up */ // initialize states and inputs for(ii=0; ii<=N; ii++) for(jj=0; jj<nx+nu; jj++) hux[ii][jj] = 0; hux[0][nu+0] = xx0[0]; hux[0][nu+1] = xx0[1]; // call the IP solver // if(FREE_X0==0) // { if(IP==1) hpmpc_status = d_ip_soft_mpc(&kk, k_max, mu0, mu_tol, alpha_min, warm_start, sigma, stat, nx, nu, N, nh, ns, hpBAbt, hpQ, hZ, hz, hdb, hux, compute_mult, hpi, hlam, ht, work); else hpmpc_status = d_ip2_soft_mpc(&kk, k_max, mu0, mu_tol, alpha_min, warm_start, sigma, stat, nx, nu, N, nh, ns, hpBAbt, hpQ, hZ, hz, hdb, hux, compute_mult, hpi, hlam, ht, work); // } // else // { // if(IP==1) // hpmpc_status = d_ip_box_mhe_old(&kk, k_max, mu_tol, alpha_min, warm_start, sigma, stat, nx, nu, N, nb, hpBAbt, hpQ, hdb, hux, compute_mult, hpi, hlam, ht, work); // else // hpmpc_status = d_ip2_box_mhe_old(&kk, k_max, mu_tol, alpha_min, warm_start, sigma, stat, nx, nu, N, nb, hpBAbt, hpQ, hdb, hux, compute_mult, hpi, hlam, ht, work); // } #if 0 if(PRINTSTAT==1) { printf("\n"); printf("\n"); printf(" Print IP statistics of the last run (soft-constraints solver)\n"); printf("\n"); for(jj=0; jj<kk; jj++) printf("k = %d\tsigma = %f\talpha = %f\tmu = %f\t\tmu = %e\talpha = %f\tmu = %f\tmu = %e\n", jj, stat[5*jj], stat[5*jj+1], stat[5*jj+2], stat[5*jj+2], stat[5*jj+3], stat[5*jj+4], stat[5*jj+4]); printf("\n"); } if(PRINTRES==1) { printf("\n"); printf("\n"); printf(" Print solution\n"); printf("\n"); printf("\nu = \n\n"); for(ii=0; ii<N; ii++) d_print_mat(1, nu, hux[ii], 1); printf("\nx = \n\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nx, hux[ii]+nu, 1); printf("\nlam = \n\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, 2*nb, hlam[ii], 1); } #endif int kk_avg = 0; int kks_avg = 0; /* timing */ struct timeval tv0, tv1, tv2, tv3, tv4, tv5; // use general constraint to solve the soft-box-constrainted problem #if 1 int nus = nu + 2*nx; // number of inputs and slack variables int nbs = nus; int ngs = nx; const int nzs = nx+nus+1; const int cnzs = ncl*((nzs+ncl-1)/ncl); const int cngs = ncl*((ngs+ncl-1)/ncl); const int cnxgs= ncl*((ngs+nx+ncl-1)/ncl); const int pnzs = bs*((nzs+bs-1)/bs); const int pnbs = bs*((nbs+bs-1)/bs); // simd aligned number of one-sided box constraints !!!!!!!!!!!! const int pngs = bs*((ngs+bs-1)/bs); // simd aligned number of one-sided box constraints !!!!!!!!!!!! const int cnls = cnzs<cnx+ncl ? cnx+ncl : cnzs; const int anzs = nal*((nzs+nal-1)/nal); double *pBAbts; d_zeros_align(&pBAbts, pnzs, cnx); d_cvt_tran_mat2pmat(nx, nu, B, nx, 0, pBAbts, cnx); d_cvt_tran_mat2pmat(nx, nx, A, nx, nus, pBAbts+nus/bs*cnx*bs+nus%bs, cnx); for(jj=0; jj<nx; jj++) pBAbts[(nx+nus)/bs*cnx*bs+(nx+nus)%bs+jj*bs] = b[jj]; //d_print_pmat (nzs, nx, bs, pBAbts, cnx); double *ds; d_zeros_align(&ds, 2*pnbs+2*pngs, 1); for(jj=0; jj<nu; jj++) { ds[jj] = - 0.5; // umin ds[pnbs+jj] = - 0.5; // - umax } for(; jj<nus; jj++) { ds[jj] = 0.0; // smin ds[pnbs+jj] = - 10.0; // - smax } for(jj=0; jj<ngs; jj++) { ds[2*pnbs+jj] = - 1.0; // xmin ds[2*pnbs+pngs+jj] = - 1.0; // - xmax } //d_print_mat(1, 2*pnbs+2*pngs, ds, 1); double *Cs; d_zeros(&Cs, ngs, nx); double *Ds; d_zeros(&Ds, ngs, nus); for(jj=0; jj<nx; jj++) { Cs[jj+jj*ngs] = 1.0; Ds[jj+(nu+jj)*ngs] = 1.0; Ds[jj+(nu+nx+jj)*ngs] = - 1.0; } double *pDCts; d_zeros_align(&pDCts, pnzs, cngs); d_cvt_tran_mat2pmat(ngs, nus, Ds, ngs, 0, pDCts, cngs); d_cvt_tran_mat2pmat(ngs, nx, Cs, ngs, nus, pDCts+nus/bs*cngs*bs+nus%bs, cngs); //d_print_pmat(nus+nx, ngs, bs, pDCts, cngs); double *Qs; d_zeros(&Qs, nzs, nzs); d_copy_mat(nu, nu, Q, nz, Qs, nzs); d_copy_mat(nx+1, nu, Q+nu, nz, Qs+nus, nzs); d_copy_mat(nx+1, nx, Q+nu*(nz+1), nz, Qs+nus*(nzs+1), nzs); for(jj=0; jj<nx; jj++) { Qs[(nu+jj)*(nzs+1)] = Z[2*nh+2*jj+0]; // TODO change when updated IP !!!!! Qs[(nu+nx+jj)*(nzs+1)] = Z[2*nh+2*jj+1]; // TODO change when updated IP !!!!! Qs[nus+nx+(nu+jj)*nzs] = z[2*nh+2*jj+0]; // TODO change when updated IP !!!!! Qs[nus+nx+(nu+nx+jj)*nzs] = z[2*nh+2*jj+1]; // TODO change when updated IP !!!!! } double *pQs; d_zeros_align(&pQs, pnzs, cnzs); d_cvt_mat2pmat(nzs, nzs, Qs, nzs, 0, pQs, cnzs); //d_print_pmat(nzs, nzs, bs, pQs, cnzs); double *hpQs[N+1]; double *huxs[N+1]; double *hpis[N+1]; double *hlams[N+1]; double *hts[N+1]; double *hpBAbts[N]; double *hpDCts[N+1]; double *hds[N+1]; for(jj=0; jj<N; jj++) { hpQs[jj] = pQs; hpBAbts[jj] = pBAbts; hpDCts[jj] = pDCts; hds[jj] = ds; d_zeros_align(&huxs[jj], pnzs, 1); d_zeros_align(&hpis[jj], pnx, 1); d_zeros_align(&hlams[jj], 2*pnbs+2*pngs, 1); d_zeros_align(&hts[jj], 2*pnbs+2*pngs, 1); } hpQs[N] = pQs; d_zeros_align(&hpDCts[N], pnzs, cngs); d_zeros_align(&hds[N], 2*pnbs+2*pngs, 1); d_zeros_align(&huxs[N], pnzs, 1); d_zeros_align(&hpis[N], pnx, 1); d_zeros_align(&hlams[N] ,2*pnbs+2*pngs, 1); d_zeros_align(&hts[N], 2*pnbs+2*pngs, 1); double *works; d_zeros_align(&works, (N+1)*(pnzs*cnls + pnzs + 5*anzs + 10*(pnbs+pngs) + 2*anx) + anzs + pnzs*cnxgs, 1); // work space gettimeofday(&tv0, NULL); // start for(rep=0; rep<nrep; rep++) { // initialize states and inputs for(ii=0; ii<=N; ii++) for(jj=0; jj<nx+nus; jj++) huxs[ii][jj] = 0; idx = rep%10; huxs[0][nus+0] = xx0[2*idx]; huxs[0][nus+1] = xx0[2*idx+1]; if(IP==1) hpmpc_status = d_ip_hard_mpc(&kk, k_max, mu0, mu_tol, alpha_min, warm_start, sigma, stat, nx, nus, N, nbs, ngs, ngs, hpBAbts, hpQs, hpDCts, hds, huxs, compute_mult, hpis, hlams, hts, works); else hpmpc_status = d_ip2_hard_mpc(&kk, k_max, mu0, mu_tol, alpha_min, warm_start, sigma, stat, nx, nus, N, nbs, ngs, ngs, hpBAbts, hpQs, hpDCts, hds, huxs, compute_mult, hpis, hlams, hts, works); kks_avg += kk; } gettimeofday(&tv1, NULL); // stop if(PRINTSTAT==1) { printf("\n"); printf("\n"); printf(" Print IP statistics of the last run (general-constraints solver)\n"); printf("\n"); for(jj=0; jj<kk; jj++) printf("k = %d\tsigma = %f\talpha = %f\tmu = %f\t\tmu = %e\talpha = %f\tmu = %f\tmu = %e\n", jj, stat[5*jj], stat[5*jj+1], stat[5*jj+2], stat[5*jj+2], stat[5*jj+3], stat[5*jj+4], stat[5*jj+4]); printf("\n"); } if(PRINTRES==1) { printf("\n"); printf("\n"); printf(" Print solution\n"); printf("\n"); printf("\nus = \n\n"); for(ii=0; ii<N; ii++) d_print_mat(1, nus, huxs[ii], 1); printf("\nxs = \n\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nx, huxs[ii]+nus, 1); } for(jj=0; jj<N; jj++) { free(huxs[jj]); free(hpis[jj]); free(hlams[jj]); free(hts[jj]); } free(hpDCts[N]); free(hds[N]); free(huxs[N]); free(hpis[N]); free(hlams[N]); free(hts[N]); free(works); //exit(1); #endif gettimeofday(&tv2, NULL); // start for(rep=0; rep<nrep; rep++) { idx = rep%10; // x0[0] = xx0[2*idx]; // x0[1] = xx0[2*idx+1]; // initialize states and inputs for(ii=0; ii<=N; ii++) for(jj=0; jj<nx+nu; jj++) hux[ii][jj] = 0; hux[0][nu+0] = xx0[2*idx]; hux[0][nu+1] = xx0[2*idx+1]; // call the IP solver // if(FREE_X0==0) // { if(IP==1) hpmpc_status = d_ip_soft_mpc(&kk, k_max, mu0, mu_tol, alpha_min, warm_start, sigma, stat, nx, nu, N, nh, ns, hpBAbt, hpQ, hZ, hz, hdb, hux, compute_mult, hpi, hlam, ht, work); else hpmpc_status = d_ip2_soft_mpc(&kk, k_max, mu0, mu_tol, alpha_min, warm_start, sigma, stat, nx, nu, N, nh, ns, hpBAbt, hpQ, hZ, hz, hdb, hux, compute_mult, hpi, hlam, ht, work); // } // else // { // if(IP==1) // hpmpc_status = d_ip_box_mhe_old(&kk, k_max, mu_tol, alpha_min, warm_start, sigma, stat, nx, nu, N, nb, hpBAbt, hpQ, hdb, hux, compute_mult, hpi, hlam, ht, work); // else // hpmpc_status = d_ip2_box_mhe_old(&kk, k_max, mu_tol, alpha_min, warm_start, sigma, stat, nx, nu, N, nb, hpBAbt, hpQ, hdb, hux, compute_mult, hpi, hlam, ht, work); // } kk_avg += kk; } gettimeofday(&tv3, NULL); // stop // restore linear part of cost function for(ii=0; ii<N; ii++) { for(jj=0; jj<nx+nu; jj++) hq[ii][jj] = Q[nx+nu+nz*jj]; } for(jj=0; jj<nx+nu; jj++) hq[N][jj] = Q[nx+nu+nz*jj]; // residuals computation // if(FREE_X0==0) d_res_ip_soft_mpc(nx, nu, N, nh, ns, hpBAbt, hpQ, hq, hZ, hz, hux, hdb, hpi, hlam, ht, hrq, hrb, hrd, hrz, &mu); // else // d_res_ip_box_mhe_old(nx, nu, N, nb, hpBAbt, hpQ, hq, hux, hdb, hpi, hlam, ht, hrq, hrb, hrd, &mu); if(PRINTSTAT==1) { printf("\n"); printf("\n"); printf(" Print IP statistics of the last run (soft-constraints solver)\n"); printf("\n"); for(jj=0; jj<kk; jj++) printf("k = %d\tsigma = %f\talpha = %f\tmu = %f\t\tmu = %e\talpha = %f\tmu = %f\tmu = %e\n", jj, stat[5*jj], stat[5*jj+1], stat[5*jj+2], stat[5*jj+2], stat[5*jj+3], stat[5*jj+4], stat[5*jj+4]); printf("\n"); } if(PRINTRES==1) { printf("\n"); printf("\n"); printf(" Print solution\n"); printf("\n"); printf("\nu = \n\n"); for(ii=0; ii<N; ii++) d_print_mat(1, nu, hux[ii], 1); printf("\nx = \n\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nx, hux[ii]+nu, 1); printf("\nlam = \n\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, 2*nb, hlam[ii], 1); } if(PRINTRES==1 && COMPUTE_MULT==1) { // print result // print result printf("\n"); printf("\n"); printf(" Print residuals\n\n"); printf("\n"); printf("\n"); printf("rq = \n\n"); // if(FREE_X0==0) // { d_print_mat(1, nu, hrq[0], 1); for(ii=1; ii<=N; ii++) /* d_print_mat_e(1, nx+nu, hrq[ii], 1);*/ d_print_mat(1, nx+nu, hrq[ii], 1); // } // else // { // for(ii=0; ii<=N; ii++) ///* d_print_mat_e(1, nx+nu, hrq[ii], 1);*/ // d_print_mat(1, nx+nu, hrq[ii], 1); // } printf("rz = \n\n"); for(ii=0; ii<=N; ii++) // d_print_mat_e(1, 2*nb-2*nu, hrz[ii]+2*nu, 1); d_print_mat(1, 2*nb-2*nu, hrz[ii]+2*nu, 1); printf("\n"); printf("\n"); printf("\n"); printf("\n"); printf("rb = \n\n"); for(ii=0; ii<N; ii++) /* d_print_mat_e(1, nx, hrb[ii], 1);*/ d_print_mat(1, nx, hrb[ii], 1); printf("\n"); printf("\n"); printf("rd = \n\n"); for(ii=0; ii<=N; ii++) /* d_print_mat_e(1, 2*nb, hrd[ii], 1);*/ d_print_mat(1, 2*nb, hrd[ii], 1); printf("\n"); printf("\n"); printf("mu = %e\n\n", mu); } /* printf("\nnx\tnu\tN\tkernel\n\n");*/ /* printf("\n%d\t%d\t%d\t%e\n\n", nx, nu, N, time);*/ /************************************************************************************************** * * time-variant nx and nu, sparse box and soft constraints format * **************************************************************************************************/ // problem size int nx_tv[N+1]; int nu_tv[N+1]; int nb_tv[N+1]; int ng_tv[N+1]; int ns_tv[N+1]; int nz_tv[N+1]; // vector of zeros // first stage nx_tv[0] = 0; nu_tv[0] = nu; nb_tv[0] = nu; ng_tv[0] = 0; ns_tv[0] = 0; nz_tv[0] = 0; // middle stages for(ii=1; ii<N; ii++) { nx_tv[ii] = nx; nu_tv[ii] = nu; nb_tv[ii] = nu; ng_tv[ii] = 0; ns_tv[ii] = nx; nz_tv[ii] = 0; } // last stage nx_tv[N] = nx; nu_tv[N] = 0; nb_tv[N] = 0; ng_tv[N] = 0; ns_tv[N] = nx; nz_tv[N] = 0; // matrix sizes int pnz_tv[N+1]; int pnx_tv[N+1]; int pnb_tv[N+1]; int png_tv[N+1]; int pns_tv[N+1]; int cnz_tv[N+1]; int cnx_tv[N+1]; int cnl_tv[N+1]; for(ii=0; ii<=N; ii++) { pnz_tv[ii] = (nu_tv[ii]+nx_tv[ii]+1+bs-1)/bs*bs; pnx_tv[ii] = (nx_tv[ii]+bs-1)/bs*bs; pnb_tv[ii] = (nb_tv[ii]+bs-1)/bs*bs; png_tv[ii] = (ng_tv[ii]+bs-1)/bs*bs; pns_tv[ii] = (ns_tv[ii]+bs-1)/bs*bs; cnz_tv[ii] = (nu_tv[ii]+nx_tv[ii]+1+ncl-1)/ncl*ncl; cnx_tv[ii] = (nx_tv[ii]+ncl-1)/ncl*ncl; cnl_tv[ii] = cnz_tv[ii]<cnx_tv[ii]+ncl ? cnx_tv[ii]+ncl : cnz_tv[ii]; } // for(ii=0; ii<=N; ii++) // printf("\n%d\t%d\t%d\t%d\t%d\t%d\t%d\n", pnz_tv[ii], pnx_tv[ii], pnb_tv[ii], pns_tv[ii], cnz_tv[ii], cnx_tv[ii], cnl_tv[ii]); // state-space matrices //d_print_mat(nx, nx, A, nx); //d_print_mat(nx, nu, B, nx); //for(ii=0; ii<nx; ii++) b[ii] = 1.0; //d_print_mat(nx, 1, b, nx); //d_print_mat(nx, 1, x0, nx); // compute b0 double *pA; d_zeros_align(&pA, pnx, cnx); d_cvt_mat2pmat(nx, nx, A, nx, 0, pA, cnx); double *b0; d_zeros_align(&b0, pnx, 1); dgemv_n_lib(nx, nx, pA, cnx, x0, 1, b, b0); //d_print_pmat(nx, nx, bs, pA, cnx); //d_print_mat(nx, 1, b0, nx); double *pBAbt0; d_zeros_align(&pBAbt0, pnz_tv[0], cnx_tv[1]); d_cvt_tran_mat2pmat(nx, nu, B, nx, 0, pBAbt0, cnx_tv[1]); d_cvt_tran_mat2pmat(nx, 1, b0, nx, nu, pBAbt0+nu/bs*bs*cnx_tv[1]+nu%bs, cnx_tv[1]); //d_print_pmat(nu_tv[0]+nx_tv[0]+1, nx_tv[1], bs, pBAbt0, cnx_tv[1]); double *pBAbt1; d_zeros_align(&pBAbt1, pnz_tv[1], cnx_tv[2]); d_cvt_tran_mat2pmat(nx, nu, B, nx, 0, pBAbt1, cnx_tv[2]); d_cvt_tran_mat2pmat(nx, nx, A, nx, nu, pBAbt1+nu/bs*bs*cnx_tv[2]+nu%bs, cnx_tv[2]); d_cvt_tran_mat2pmat(nx, 1, b, nx, nu+nx, pBAbt1+(nu+nx)/bs*bs*cnx_tv[2]+(nu+nx)%bs, cnx_tv[2]); // d_print_pmat(nu_tv[1]+nx_tv[1]+1, nx_tv[2], bs, pBAbt1, cnx_tv[2]); double *(hpBAbt_tv[N]); hpBAbt_tv[0] = pBAbt0; for(ii=1; ii<N; ii++) hpBAbt_tv[ii] = pBAbt1; // cost function matrices //for(ii=nu; ii<nu+nx; ii++) Q[ii*(nz+1)] = 1.0; // TODO remove !!!! //d_print_mat(nz, nz, Q, nz); double *q; d_zeros_align(&q, pnz, 1); for(ii=0; ii<nu; ii++) q[ii] = Q[nu+nx+ii*nz]; //d_print_mat(nu, 1, q, nu); double *pS; d_zeros_align(&pS, pnu, cnx); d_cvt_tran_mat2pmat(nx, nu, Q+nu, nz, 0, pS, cnx); //d_print_pmat(nu, nx, bs, pS, cnx); double *q0; d_zeros_align(&q0, pnz_tv[0], 1); dgemv_n_lib(nu, nx, pS, cnx, x0, 1, q, q0); //d_print_mat(nu, 1, q0, nu); double *pQ0; d_zeros_align(&pQ0, pnz_tv[0], cnz_tv[0]); d_cvt_mat2pmat(nu, nu, Q, nz, 0, pQ0, cnz_tv[0]); d_cvt_tran_mat2pmat(nu, 1, q0, nu, nu, pQ0+nu/bs*bs*cnz_tv[0]+nu%bs, cnz_tv[0]); //d_print_pmat(nu_tv[0]+nx_tv[0]+1, nu_tv[0]+nx_tv[0]+1, bs, pQ0, pnz_tv[0]); double *pQ1; d_zeros_align(&pQ1, pnz_tv[1], cnz_tv[1]); d_cvt_mat2pmat(nz, nz, Q, nz, 0, pQ1, cnz_tv[1]); //d_print_pmat(nu_tv[1]+nx_tv[1]+1, nu_tv[1]+nx_tv[1]+1, bs, pQ1, pnz_tv[1]); double *pQN; d_zeros_align(&pQN, pnz_tv[N], cnz_tv[N]); d_cvt_mat2pmat(nx+1, nx+1, Q+nu*(nz+1), nz, 0, pQN, cnz_tv[N]); //d_print_pmat(nu_tv[N]+nx_tv[N]+1, nu_tv[N]+nx_tv[N]+1, bs, pQN, cnz_tv[N]); double *(hpQ_tv[N+1]); hpQ_tv[0] = pQ0; for(ii=1; ii<N; ii++) hpQ_tv[ii] = pQ1; hpQ_tv[N] = pQN; double *(hpL_tv[N+1]); for(ii=0; ii<=N; ii++) d_zeros_align(&hpL_tv[ii], pnz_tv[ii], cnl_tv[ii]); double *(hdL_tv[N+1]); for(ii=0; ii<=N; ii++) d_zeros_align(&hdL_tv[ii], pnz_tv[ii], 1); double *hux_tv[N+1]; for(ii=0; ii<=N; ii++) d_zeros_align(&hux_tv[ii], (nu_tv[ii]+nx_tv[ii]+bs-1)/bs*bs, 1); double *hpi_tv[N+1]; for(ii=0; ii<=N; ii++) d_zeros_align(&hpi_tv[ii], pnx_tv[ii], 1); // dummy variables int **pdummyi; double **pdummyd; #if 0 // work space double *ric_tv_work; d_zeros_align(&ric_tv_work, d_ric_sv_mpc_tv_work_space_size_double(N, nx_tv, nu_tv, nz_tv, nz_tv), 1); double *ric_tv_diag; d_zeros_align(&ric_tv_diag, pnz, 1); // call the Riccati solver d_back_ric_sv_tv(N, nx_tv, nu_tv, hpBAbt_tv, hpQ_tv, hux_tv, hpL_tv, hdL_tv, ric_tv_work, ric_tv_diag, 0, pdummyd, 1, hpi_tv, nz_tv, pdummyi, pdummyd, pdummyd, nz_tv, pdummyd, pdummyd, pdummyd); // print solution for(ii=0; ii<=N; ii++) d_print_mat(1, nu_tv[ii]+nx_tv[ii], hux_tv[ii], 1); #endif // constraints int *idxb0 = (int *) malloc((nb_tv[0]+ns_tv[0])*sizeof(int)); double *db0; d_zeros_align(&db0, 2*pnb_tv[0]+2*pns_tv[0], 1); int nbu0; nbu0 = nu_tv[0]<nb_tv[0] ? nu_tv[0] : nb_tv[0]; idx = 0; for(jj=0; jj<nbu0; jj++) { idxb0[idx] = idx; db0[0*pnb_tv[0]+jj] = - 0.5; // umin_hard db0[1*pnb_tv[0]+jj] = - 0.5; // umax_hard idx++; } int *idxb1 = (int *) malloc((nb_tv[1]+ns_tv[1])*sizeof(int)); double *db1; d_zeros_align(&db1, 2*pnb_tv[1]+2*pns_tv[1], 1); nbu0 = nu_tv[1]<nb_tv[1] ? nu_tv[1] : nb_tv[1]; idx = 0; for(jj=0; jj<nbu0; jj++) { idxb1[idx] = idx; db1[0*pnb_tv[1]+jj] = - 0.5; // umin_hard db1[1*pnb_tv[1]+jj] = - 0.5; // umax_hard idx++; } for(jj=nu_tv[1]; jj<nb_tv[1]; jj++) { idxb1[idx] = idx; db1[0*pnb_tv[1]+jj] = - 4.0; // xmin_hard db1[1*pnb_tv[1]+jj] = - 4.0; // xmax_hard idx++; } for(jj=0; jj<ns_tv[1]; jj++) { idxb1[idx] = idx; db1[2*pnb_tv[1]+0*pns_tv[1]+jj] = - 1.0; // xmin_soft db1[2*pnb_tv[1]+1*pns_tv[1]+jj] = - 1.0; // xmax soft idx++; } int *idxbN = (int *) malloc((nb_tv[N]+ns_tv[N])*sizeof(int)); double *dbN; d_zeros_align(&dbN, 2*pnb_tv[N]+2*pns_tv[N], 1); idx = 0; for(jj=nu_tv[N]; jj<nb_tv[N]; jj++) { idxbN[idx] = idx; dbN[0*pnb_tv[N]+jj] = - 4.0; // xmin_hard dbN[1*pnb_tv[N]+jj] = - 4.0; // xmax_hard idx++; } for(jj=0; jj<ns_tv[N]; jj++) { idxbN[idx] = idx; dbN[2*pnb_tv[N]+0*pns_tv[N]+jj] = - 1.0; // xmin_soft dbN[2*pnb_tv[N]+1*pns_tv[N]+jj] = - 1.0; // xmax soft idx++; } int *idxb_tv[N+1]; double *hdb_tv[N+1]; idxb_tv[0] = idxb0; hdb_tv[0] = db0; for(ii=1; ii<N; ii++) { idxb_tv[ii] = idxb1; hdb_tv[ii] = db1; } idxb_tv[N] = idxbN; hdb_tv[N] = dbN; #if 0 for(ii=0; ii<=N; ii++) { for(jj=0; jj<nb_tv[ii]+ns_tv[ii]; jj++) printf("\t%d", idxb_tv[ii][jj]); printf("\n"); } #endif // cost function of the soft contraint slack variables double *Z1; d_zeros_align(&Z1, 2*pns_tv[1], 1); for(ii=0; ii<ns_tv[1]; ii++) { Z1[0*pns_tv[1]+ii] = 0.0; Z1[1*pns_tv[1]+ii] = 0.0; } double *z1; d_zeros_align(&z1, 2*pns_tv[1], 1); for(ii=0; ii<ns_tv[1]; ii++) { z1[0*pns_tv[1]+ii] = 100.0; z1[1*pns_tv[1]+ii] = 100.0; } double *hZ_tv[N+1]; double *hz_tv[N+1]; for(ii=0; ii<=N; ii++) { hZ_tv[ii] = Z1; hz_tv[ii] = z1; } // maximum element in cost functions mu0 = 1.0; for(ii=0; ii<nu+nx; ii++) for(jj=0; jj<nu+nx; jj++) mu0 = fmax(mu0, Q[jj+nz*ii]); for(ii=0; ii<ns; ii++) { mu0 = fmax(mu0, Z[0*pns_tv[1]+ii]); mu0 = fmax(mu0, Z[1*pns_tv[1]+ii]); mu0 = fmax(mu0, z[0*pns_tv[1]+ii]); mu0 = fmax(mu0, z[1*pns_tv[1]+ii]); } //printf("\n mu0 = %f\n", mu0); // lagrangian multipliers and slack variables double *hlam_tv[N+1]; double *ht_tv[N+1]; for(ii=0; ii<=N; ii++) { d_zeros_align(&hlam_tv[ii], 2*pnb_tv[ii]+2*png_tv[ii]+4*pns_tv[ii], 1); d_zeros_align(&ht_tv[ii], 2*pnb_tv[ii]+2*png_tv[ii]+4*pns_tv[ii], 1); } // ip soft work space double *ip_soft_tv_work; d_zeros_align(&ip_soft_tv_work, d_ip2_soft_mpc_tv_work_space_size_double(N, nx_tv, nu_tv, nb_tv, ng_tv, ns_tv), 1); // call the ip soft solver d_ip2_soft_mpc_tv(&kk, k_max, mu0, mu_tol, alpha_min, warm_start, sigma, stat, N, nx_tv, nu_tv, nb_tv, idxb_tv, ng_tv, ns_tv, hpBAbt_tv, hpQ_tv, hZ_tv, hz_tv, pdummyd, hdb_tv, hux_tv, 1, hpi_tv, hlam_tv, ht_tv, ip_soft_tv_work); int kk_avg_tv = 0; gettimeofday(&tv4, NULL); // start for(rep=0; rep<nrep; rep++) { idx = rep%10; // x0[0] = xx0[2*idx]; // x0[1] = xx0[2*idx+1]; // initialize states and inputs // for(ii=0; ii<=N; ii++) // for(jj=0; jj<nx+nu; jj++) // hux[ii][jj] = 0; x0[0] = xx0[2*idx]; x0[1] = xx0[2*idx+1]; // update initial state embedded in b and r dgemv_n_lib(nx, nx, pA, cnx, x0, 1, b, b0); d_cvt_tran_mat2pmat(nx, 1, b0, nx, nu, pBAbt0+nu/bs*bs*cnx_tv[1]+nu%bs, cnx_tv[1]); dgemv_n_lib(nu, nx, pS, cnx, x0, 1, q, q0); d_cvt_tran_mat2pmat(nu, 1, q0, nu, nu, pQ0+nu/bs*bs*cnz_tv[0]+nu%bs, cnz_tv[0]); // call the IP solver d_ip2_soft_mpc_tv(&kk, k_max, mu0, mu_tol, alpha_min, warm_start, sigma, stat, N, nx_tv, nu_tv, nb_tv, idxb_tv, ng_tv, ns_tv, hpBAbt_tv, hpQ_tv, hZ_tv, hz_tv, pdummyd, hdb_tv, hux_tv, 1, hpi_tv, hlam_tv, ht_tv, ip_soft_tv_work); kk_avg_tv += kk; } gettimeofday(&tv5, NULL); // stop double *hrq_tv[N+1]; double *hrb_tv[N]; double *hrd_tv[N+1]; double *hrz_tv[N+1]; double *hq_tv[N+1]; for(ii=0; ii<N; ii++) { d_zeros_align(&hrq_tv[ii], pnz_tv[ii], 1); d_zeros_align(&hrb_tv[ii], pnx_tv[ii+1], 1); d_zeros_align(&hrd_tv[ii], 2*pnb_tv[ii]+2*png_tv[ii]+2*pns_tv[ii], 1); d_zeros_align(&hrz_tv[ii], 2*pns_tv[ii], 1); d_zeros_align(&hq_tv[ii], pnz_tv[ii], 1); } d_zeros_align(&hrq_tv[N], pnz_tv[N], 1); d_zeros_align(&hrd_tv[N], 2*pnb_tv[N]+2*png_tv[N]+2*pns_tv[N], 1); d_zeros_align(&hrz_tv[N], 2*pns_tv[N], 1); d_zeros_align(&hq_tv[N], pnz_tv[N], 1); // restore linear part of cost function for(ii=0; ii<=N; ii++) { drowex_lib(nu_tv[ii]+nx_tv[ii], hpQ_tv[ii]+(nu_tv[ii]+nx_tv[ii])/bs*bs*cnz_tv[ii]+(nu_tv[ii]+nx_tv[ii])%bs, hq_tv[ii]); } // residuals computation // d_res_ip_soft_mpc(nx, nu, N, nh, ns, hpBAbt, hpQ, hq, hZ, hz, hux, hdb, hpi, hlam, ht, hrq, hrb, hrd, hrz, &mu); d_res_ip_soft_mpc_tv(N, nx_tv, nu_tv, nb_tv, idxb_tv, ng_tv, ns_tv, hpBAbt_tv, hpQ_tv, hq_tv, hZ_tv, hz_tv, hux_tv, pdummyd, hdb_tv, hpi_tv, hlam_tv, ht_tv, hrq_tv, hrb_tv, hrd_tv, hrz_tv, &mu); if(PRINTSTAT==1) { printf("\n"); printf("\n"); printf(" Print IP statistics of the last run (soft-constraints time-variant solver)\n"); printf("\n"); for(jj=0; jj<kk; jj++) printf("k = %d\tsigma = %f\talpha = %f\tmu = %f\t\tmu = %e\talpha = %f\tmu = %f\tmu = %e\n", jj, stat[5*jj], stat[5*jj+1], stat[5*jj+2], stat[5*jj+2], stat[5*jj+3], stat[5*jj+4], stat[5*jj+4]); printf("\n"); } if(PRINTRES==1) { printf("\n"); printf("\n"); printf(" Print solution\n"); printf("\n"); // print solution printf("\nhux_tv = \n\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nu_tv[ii]+nx_tv[ii], hux_tv[ii], 1); } if(PRINTRES==1 && COMPUTE_MULT==1) { // print result // print result printf("\n"); printf("\n"); printf(" Print residuals\n\n"); printf("\n"); printf("\n"); printf("rq = \n\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nu_tv[ii]+nx_tv[ii], hrq_tv[ii], 1); printf("\n"); printf("\n"); printf("rz = \n\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, 2*pns_tv[ii], hrz_tv[ii], 1); printf("\n"); printf("\n"); printf("rb = \n\n"); for(ii=0; ii<N; ii++) d_print_mat(1, nx_tv[ii], hrb_tv[ii], 1); printf("\n"); printf("\n"); printf("rd = \n\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, 2*pnb_tv[ii]+2*png_tv[ii]+2*pns_tv[ii], hrd_tv[ii], 1); printf("\n"); printf("\n"); printf("mu = %e\n\n", mu); } // free memory free(pA); free(b0); free(pBAbt0); free(pBAbt1); free(pQ0); free(pQ1); free(pQN); free(idxb0); free(idxb1); free(idxbN); free(db0); free(db1); free(dbN); free(Z1); free(z1); for(ii=0; ii<=N; ii++) free(hpL_tv[ii]); for(ii=0; ii<=N; ii++) free(hdL_tv[ii]); for(ii=0; ii<=N; ii++) free(hux_tv[ii]); for(ii=0; ii<=N; ii++) free(hpi_tv[ii]); for(ii=0; ii<=N; ii++) free(hlam_tv[ii]); for(ii=0; ii<=N; ii++) free(ht_tv[ii]); for(ii=0; ii<=N; ii++) free(hrq_tv[ii]); for(ii=0; ii<N; ii++) free(hrb_tv[ii]); for(ii=0; ii<=N; ii++) free(hrd_tv[ii]); for(ii=0; ii<=N; ii++) free(hrz_tv[ii]); for(ii=0; ii<=N; ii++) free(hq_tv[ii]); /************************************************************************************************** * printing timings **************************************************************************************************/ double times = (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6); double time = (tv3.tv_sec-tv2.tv_sec)/(nrep+0.0)+(tv3.tv_usec-tv2.tv_usec)/(nrep*1e6); double time_tv = (tv5.tv_sec-tv4.tv_sec)/(nrep+0.0)+(tv5.tv_usec-tv4.tv_usec)/(nrep*1e6); /* printf("\nnx\tnu\tN\tkernel\n\n");*/ /* printf("\n%d\t%d\t%d\t%e\n\n", nx, nu, N, time);*/ printf("\n"); printf(" Average number of iterations over %d runs: %5.1f (soft-constraints solver)\n", nrep, kk_avg / (double) nrep); printf(" Average number of iterations over %d runs: %5.1f (general-constraints solver)\n", nrep, kks_avg / (double) nrep); printf(" Average number of iterations over %d runs: %5.1f (soft-constraints time-variant solver)\n", nrep, kk_avg_tv / (double) nrep); printf("\n"); printf(" Average solution time over %d runs: %5.2e seconds (soft-constraints solver)\n", nrep, time); printf(" Average solution time over %d runs: %5.2e seconds (general-constraints solver)\n", nrep, times); printf(" Average solution time over %d runs: %5.2e seconds (soft-constraints time-variant solver)\n", nrep, time_tv); printf("\n"); /************************************************ * free memory and return ************************************************/ free(A); free(B); free(b); free(x0); /* free(BAb);*/ /* free(BAbt);*/ free(pBAbt); free(db); free(Q); free(pQ); free(Z); free(z); free(work); free(stat); for(jj=0; jj<N; jj++) { // free(hpQ[jj]); free(hq[jj]); free(hux[jj]); free(hpi[jj]); free(hlam[jj]); free(ht[jj]); free(hrb[jj]); free(hrq[jj]); free(hrd[jj]); free(hrz[jj]); } // free(hpQ[N]); free(hq[N]); free(hux[N]); free(hpi[N]); free(hlam[N]); free(ht[N]); free(hrq[N]); free(hrd[N]); free(hrz[N]); return 0; }
int main() { printf("\n"); printf("\n"); printf("\n"); printf(" HPMPC -- Library for High-Performance implementation of solvers for MPC.\n"); printf(" Copyright (C) 2014 by Technical University of Denmark. All rights reserved.\n"); printf("\n"); printf(" HPMPC is distributed in the hope that it will be useful,\n"); printf(" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"); printf(" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"); printf(" See the GNU Lesser General Public License for more details.\n"); printf("\n"); printf("\n"); printf("\n"); #if defined(TARGET_X64_AVX2) || defined(TARGET_X64_AVX) || defined(TARGET_X64_SSE3) || defined(TARGET_X86_ATOM) || defined(TARGET_AMD_SSE3) /* printf("\nflush subnormals to zero\n");*/ _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); // flush to zero subnormals !!! works only with one thread !!! #endif int ii, jj, idx; int rep, nrep=NREP; int nx = NX; // number of states (it has to be even for the mass-spring system test problem) int nu = NU; // number of inputs (controllers) (it has to be at least 1 and at most nx/2 for the mass-spring system test problem) int N = NN; // horizon lenght int nb = NB; // number of box constrained inputs and states printf(" Test problem: mass-spring system with %d masses and %d controls.\n", nx/2, nu); printf("\n"); printf(" MPC problem size: %d states, %d inputs, %d horizon length, %d two-sided box constraints.\n", nx, nu, N, nb); printf("\n"); #if IP == 1 printf(" IP method parameters: primal-dual IP, double precision, %d maximum iterations, %5.1e exit tolerance in duality measure (edit file test_d_ip_box.c to change them).\n", K_MAX, MU_TOL); #elif IP == 2 printf(" IP method parameters: predictor-corrector IP, double precision, %d maximum iterations, %5.1e exit tolerance in duality measure (edit file test_d_ip_box.c to change them).\n", K_MAX, MU_TOL); #else printf(" Wrong value for IP solver choice: %d\n", IP); #endif int info = 0; const int bs = D_MR; //d_get_mr(); const int ncl = D_NCL; const int nal = bs*ncl; // number of doubles per cache line const int nz = nx+nu+1; const int pnz = bs*((nz+bs-1)/bs); const int pnx = bs*((nx+bs-1)/bs); const int cnz = ncl*((nx+nu+1+ncl-1)/ncl); const int cnx = ncl*((nx+ncl-1)/ncl); const int pnb = bs*((2*nb+bs-1)/bs); // packed number of box constraints const int anz = nal*((nz+nal-1)/nal); const int anx = nal*((nx+nal-1)/nal); const int anb = nal*((2*nb+nal-1)/nal); // cache aligned number of box constraints const int pad = (ncl-nx%ncl)%ncl; // packing between BAbtL & P const int cnl = cnz<cnx+ncl ? nx+pad+cnx+ncl : nx+pad+cnz; /************************************************ * dynamical system ************************************************/ double *A; d_zeros(&A, nx, nx); // states update matrix double *B; d_zeros(&B, nx, nu); // inputs matrix double *b; d_zeros(&b, nx, 1); // states offset double *x0; d_zeros(&x0, nx, 1); // initial state double Ts = 0.5; // sampling time mass_spring_system(Ts, nx, nu, N, A, B, b, x0); for(jj=0; jj<nx; jj++) b[jj] = 0.1; for(jj=0; jj<nx; jj++) x0[jj] = 0; x0[0] = 3.5; x0[1] = 3.5; // d_print_mat(nx, nx, A, nx); // d_print_mat(nx, nu, B, nx); // d_print_mat(nx, 1, b, nx); // d_print_mat(nx, 1, x0, nx); /* packed */ /* double *BAb; d_zeros(&BAb, nx, nz);*/ /* dmcopy(nx, nu, B, nx, BAb, nx);*/ /* dmcopy(nx, nx, A, nx, BAb+nu*nx, nx);*/ /* dmcopy(nx, 1 , b, nx, BAb+(nu+nx)*nx, nx);*/ /* transposed */ /* double *BAbt; d_zeros_align(&BAbt, pnz, pnz);*/ /* for(ii=0; ii<nx; ii++)*/ /* for(jj=0; jj<nz; jj++)*/ /* {*/ /* BAbt[jj+pnz*ii] = BAb[ii+nx*jj];*/ /* }*/ /* packed into contiguous memory */ double *pBAbt; d_zeros_align(&pBAbt, pnz, cnx); /* d_cvt_mat2pmat(nz, nx, 0, bs, BAbt, pnz, pBAbt, cnx);*/ /* d_cvt_tran_mat2pmat(nx, nz, 0, bs, BAb, nx, pBAbt, cnx);*/ d_cvt_tran_mat2pmat(nx, nu, 0, bs, B, nx, pBAbt, cnx); d_cvt_tran_mat2pmat(nx, nx, nu, bs, A, nx, pBAbt+nu/bs*cnx*bs+nu%bs, cnx); for (jj = 0; jj<nx; jj++) pBAbt[(nx+nu)/bs*cnx*bs+(nx+nu)%bs+jj*bs] = b[jj]; /* d_print_pmat (nz, nx, bs, pBAbt, cnx);*/ /* exit(1);*/ /************************************************ * box constraints ************************************************/ double *db; d_zeros_align(&db, 2*nb, 1); for(jj=0; jj<2*nu; jj++) db[jj] = - 0.5; // umin for(; jj<2*nb; jj++) db[jj] = - 4.0; // xmin /************************************************ * cost function ************************************************/ double *Q; d_zeros_align(&Q, pnz, pnz); for(ii=0; ii<nu; ii++) Q[ii*(pnz+1)] = 2.0; for(; ii<pnz; ii++) Q[ii*(pnz+1)] = 1.0; for(ii=0; ii<nz; ii++) Q[nx+nu+ii*pnz] = 0.1; /* Q[(nx+nu)*(pnz+1)] = 1e35; // large enough (not needed any longer) */ /* packed into contiguous memory */ double *pQ; d_zeros_align(&pQ, pnz, cnz); d_cvt_mat2pmat(nz, nz, 0, bs, Q, pnz, pQ, cnz); /************************************************ * matrices series ************************************************/ double *(hpQ[N+1]); double *(hq[N+1]); double *(hux[N+1]); double *(hpi[N+1]); double *(hlam[N+1]); double *(ht[N+1]); double *(hpBAbt[N]); double *(hdb[N+1]); double *(hrb[N]); double *(hrq[N+1]); double *(hrd[N+1]); for(jj=0; jj<N; jj++) { d_zeros_align(&hpQ[jj], pnz, cnz); } d_zeros_align(&hpQ[N], pnz, pnz); for(jj=0; jj<N; jj++) { d_zeros_align(&hq[jj], anz, 1); d_zeros_align(&hux[jj], anz, 1); d_zeros_align(&hpi[jj], anx, 1); d_zeros_align(&hlam[jj],anb, 1); // TODO pnb d_zeros_align(&ht[jj], anb, 1); // TODO pnb hpBAbt[jj] = pBAbt; hdb[jj] = db; d_zeros_align(&hrb[jj], anx, 1); d_zeros_align(&hrq[jj], anz, 1); d_zeros_align(&hrd[jj], anb, 1); // TODO pnb } d_zeros_align(&hq[N], anz, 1); d_zeros_align(&hux[N], anz, 1); d_zeros_align(&hpi[N], anx, 1); d_zeros_align(&hlam[N], anb, 1); // TODO pnb d_zeros_align(&ht[N], anb, 1); // TODO pnb hdb[N] = db; d_zeros_align(&hrq[N], anz, 1); d_zeros_align(&hrd[N], anb, 1); // TODO pnb // starting guess for(jj=0; jj<nx; jj++) hux[0][nu+jj]=x0[jj]; /************************************************ * riccati-like iteration ************************************************/ double *work; d_zeros_align(&work, (N+1)*(pnz*cnl + 4*anz + 4*anb + 2*anx) + 3*anz, 1); // work space /* for(jj=0; jj<( (N+1)*(pnz*cnl + 4*anz + 4*anb + 2*anx) + 3*anz ); jj++) work[jj] = -1.0;*/ int kk = 0; // acutal number of iterations /* char prec = PREC; // double/single precision*/ /* double sp_thr = SP_THR; // threshold to switch between double and single precision*/ int k_max = K_MAX; // maximum number of iterations in the IP method double mu_tol = MU_TOL; // tolerance in the duality measure double alpha_min = ALPHA_MIN; // minimum accepted step length double sigma[] = {0.4, 0.3, 0.01}; // control primal-dual IP behaviour double *stat; d_zeros(&stat, 5, k_max); // stats from the IP routine int compute_mult = COMPUTE_MULT; int warm_start = WARM_START; double mu = -1.0; int hpmpc_status; /* initizile the cost function */ for(ii=0; ii<N; ii++) { for(jj=0; jj<pnz*cnz; jj++) hpQ[ii][jj]=pQ[jj]; } for(jj=0; jj<pnz*cnz; jj++) hpQ[N][jj]=pQ[jj]; // initial states double xx0[] = {3.5, 3.5, 3.66465, 2.15833, 1.81327, -0.94207, 1.86531, -2.35760, 2.91534, 1.79890, -1.49600, -0.76600, -2.60268, 1.92456, 1.66630, -2.28522, 3.12038, 1.83830, 1.93519, -1.87113}; /* warm up */ // initialize states and inputs for(ii=0; ii<=N; ii++) for(jj=0; jj<nx+nu; jj++) hux[ii][jj] = 0; hux[0][nu+0] = xx0[0]; hux[0][nu+1] = xx0[1]; // call the IP solver if(FREE_X0==0) { if(IP==1) hpmpc_status = d_ip_box_mpc(&kk, k_max, mu_tol, alpha_min, warm_start, sigma, stat, nx, nu, N, nb, hpBAbt, hpQ, hdb, hux, compute_mult, hpi, hlam, ht, work); else hpmpc_status = d_ip2_box_mpc(&kk, k_max, mu_tol, alpha_min, warm_start, sigma, stat, nx, nu, N, nb, hpBAbt, hpQ, hdb, hux, compute_mult, hpi, hlam, ht, work); } else { if(IP==1) hpmpc_status = d_ip_box_mhe_old(&kk, k_max, mu_tol, alpha_min, warm_start, sigma, stat, nx, nu, N, nb, hpBAbt, hpQ, hdb, hux, compute_mult, hpi, hlam, ht, work); else hpmpc_status = d_ip2_box_mhe_old(&kk, k_max, mu_tol, alpha_min, warm_start, sigma, stat, nx, nu, N, nb, hpBAbt, hpQ, hdb, hux, compute_mult, hpi, hlam, ht, work); } int kk_avg = 0; /* timing */ struct timeval tv0, tv1; gettimeofday(&tv0, NULL); // start for(rep=0; rep<nrep; rep++) { idx = rep%10; x0[0] = xx0[2*idx]; x0[1] = xx0[2*idx+1]; // initialize states and inputs for(ii=0; ii<=N; ii++) for(jj=0; jj<nx+nu; jj++) hux[ii][jj] = 0; hux[0][nu+0] = xx0[2*idx]; hux[0][nu+1] = xx0[2*idx+1]; // call the IP solver if(FREE_X0==0) { if(IP==1) hpmpc_status = d_ip_box_mpc(&kk, k_max, mu_tol, alpha_min, warm_start, sigma, stat, nx, nu, N, nb, hpBAbt, hpQ, hdb, hux, compute_mult, hpi, hlam, ht, work); else hpmpc_status = d_ip2_box_mpc(&kk, k_max, mu_tol, alpha_min, warm_start, sigma, stat, nx, nu, N, nb, hpBAbt, hpQ, hdb, hux, compute_mult, hpi, hlam, ht, work); } else { if(IP==1) hpmpc_status = d_ip_box_mhe_old(&kk, k_max, mu_tol, alpha_min, warm_start, sigma, stat, nx, nu, N, nb, hpBAbt, hpQ, hdb, hux, compute_mult, hpi, hlam, ht, work); else hpmpc_status = d_ip2_box_mhe_old(&kk, k_max, mu_tol, alpha_min, warm_start, sigma, stat, nx, nu, N, nb, hpBAbt, hpQ, hdb, hux, compute_mult, hpi, hlam, ht, work); } kk_avg += kk; } gettimeofday(&tv1, NULL); // stop double time = (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6); /* printf("\nnx\tnu\tN\tkernel\n\n");*/ /* printf("\n%d\t%d\t%d\t%e\n\n", nx, nu, N, time);*/ printf("\n"); printf(" Average number of iterations over %d runs: %5.1f\n", nrep, kk_avg / (double) nrep); printf("\n"); printf(" Average solution time over %d runs: %5.2e seconds\n", nrep, time); printf("\n"); // restore linear part of cost function for(ii=0; ii<N; ii++) { for(jj=0; jj<nx+nu; jj++) hq[ii][jj] = Q[nx+nu+pnz*jj]; } for(jj=0; jj<nx+nu; jj++) hq[N][jj] = Q[nx+nu+pnz*jj]; // residuals computation if(FREE_X0==0) d_res_ip_box_mpc(nx, nu, N, nb, hpBAbt, hpQ, hq, hux, hdb, hpi, hlam, ht, hrq, hrb, hrd, &mu); else d_res_ip_box_mhe_old(nx, nu, N, nb, hpBAbt, hpQ, hq, hux, hdb, hpi, hlam, ht, hrq, hrb, hrd, &mu); if(PRINTSTAT==1) { printf("\n"); printf("\n"); printf(" Print IP statistics of the last run\n"); printf("\n"); for(jj=0; jj<kk; jj++) printf("k = %d\tsigma = %f\talpha = %f\tmu = %f\t\tmu = %e\talpha = %f\tmu = %f\tmu = %e\n", jj, stat[5*jj], stat[5*jj+1], stat[5*jj+2], stat[5*jj+2], stat[5*jj+3], stat[5*jj+4], stat[5*jj+4]); printf("\n"); } if(PRINTRES==1) { printf("\n"); printf("\n"); printf(" Print solution\n"); printf("\n"); printf("\nu = \n\n"); for(ii=0; ii<N; ii++) d_print_mat(1, nu, hux[ii], 1); printf("\nlam = \n\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, 2*nb, hlam[ii], 1); } if(PRINTRES==1 && COMPUTE_MULT==1) { // print result // print result printf("\n"); printf("\n"); printf(" Print residuals\n\n"); printf("\n"); printf("\n"); printf("rq = \n\n"); if(FREE_X0==0) { d_print_mat(1, nu, hrq[0], 1); for(ii=1; ii<=N; ii++) /* d_print_mat_e(1, nx+nu, hrq[ii], 1);*/ d_print_mat(1, nx+nu, hrq[ii], 1); } else { for(ii=0; ii<=N; ii++) /* d_print_mat_e(1, nx+nu, hrq[ii], 1);*/ d_print_mat(1, nx+nu, hrq[ii], 1); } printf("\n"); printf("\n"); printf("rb = \n\n"); for(ii=0; ii<N; ii++) /* d_print_mat_e(1, nx, hrb[ii], 1);*/ d_print_mat(1, nx, hrb[ii], 1); printf("\n"); printf("\n"); printf("rd = \n\n"); for(ii=0; ii<=N; ii++) /* d_print_mat_e(1, 2*nb, hrd[ii], 1);*/ d_print_mat(1, 2*nb, hrd[ii], 1); printf("\n"); printf("\n"); printf("mu = %e\n\n", mu); } /* printf("\nnx\tnu\tN\tkernel\n\n");*/ /* printf("\n%d\t%d\t%d\t%e\n\n", nx, nu, N, time);*/ /************************************************ * free memory and return ************************************************/ free(A); free(B); free(b); free(x0); /* free(BAb);*/ /* free(BAbt);*/ free(pBAbt); free(db); free(Q); free(pQ); free(work); free(stat); for(jj=0; jj<N; jj++) { free(hpQ[jj]); free(hq[jj]); free(hux[jj]); free(hpi[jj]); free(hlam[jj]); free(ht[jj]); free(hrb[jj]); free(hrq[jj]); free(hrd[jj]); } free(hpQ[N]); free(hq[N]); free(hux[N]); free(hpi[N]); free(hlam[N]); free(ht[N]); free(hrq[N]); free(hrd[N]); return 0; }
int main() { int i, j, rep; const int bs = D_MR; //d_get_mr(); const int bss = S_MR; //s_get_mr(); printf("\nbs = %d\n\n", bss); int n = 16; int nrep = 1000000; double *A; d_zeros(&A, n, n); double *B; d_zeros(&B, n, n); double *C; d_zeros(&C, n, n); double *L; d_zeros(&L, n, n); float *sA; s_zeros(&sA, n, n); float *sB; s_zeros(&sB, n, n); for(i=0; i<n*n; i++) { A[i] = i; sA[i] = i; } B[0] = 2; /* B[1] = 1;*/ sB[0] = 2; /* sB[1] = 1;*/ for(i=1; i<n-1; i++) { /* B[i*(n+1)-1] = 1;*/ B[i*(n+1)+0] = 2; /* B[i*(n+1)+1] = 1;*/ /* sB[i*(n+1)-1] = 1;*/ sB[i*(n+1)+0] = 2; /* sB[i*(n+1)+1] = 1;*/ } /* B[n*n-2] = 1;*/ B[n*n-1] = 2; /* sB[n*n-2] = 1;*/ sB[n*n-1] = 2; for(i=0; i<n; i++) C[i*(n+1)] = 2; for(i=0; i<n-1; i++) C[1+i*(n+1)] = 1; /*sB[1*(n+1)] = 2;*/ /* d_print_mat(n, n, C, n);*/ int pn = ((n+bs-1)/bs)*bs;//+4; int pns = ((n+bss-1)/bss)*bss;//+4; int cns = ((n+S_NCL-1)/S_NCL)*S_NCL;//+4; int cns2 = ((2*n+S_NCL-1)/S_NCL)*S_NCL; double *pA; d_zeros_align(&pA, pn, pn); double *pB; d_zeros_align(&pB, pn, pn); double *pC; d_zeros_align(&pC, pn, pn); double *pL; d_zeros_align(&pL, pn, pn); float *spA; s_zeros_align(&spA, pns, cns); float *spB; s_zeros_align(&spB, pns, cns); float *spC; s_zeros_align(&spC, pns, cns); float *spD; s_zeros_align(&spD, pns, cns); float *spE; s_zeros_align(&spE, pns, cns2); float *diag; s_zeros_align(&diag, pns, 1); d_cvt_mat2pmat(n, n, 0, bs, A, n, pA, pn); d_cvt_mat2pmat(n, n, 0, bs, B, n, pB, pn); s_cvt_mat2pmat(n, n, 0, bss, sA, n, spA, cns); s_cvt_mat2pmat(n, n, 0, bss, sB, n, spB, cns); s_cvt_mat2pmat(n, n, 0, bss, sB, n, spC, cns); s_cvt_mat2pmat(n, n, 0, bss, sB, n, spD, cns); s_cvt_mat2pmat(n, n, 0, bss, sA, n, spE, cns2); double *x; d_zeros_align(&x, n, 1); double *y; d_zeros_align(&y, n, 1); x[2] = 1; /* for(i=0; i<pn*pn; i++) pC[i] = -1;*/ /* for(i=0; i<pn*pn; i++) spC[i] = -1;*/ // d_print_pmat(pn, pn, bs, pA, pn); // d_print_pmat(pn, pn, bs, pB, pn); // d_print_pmat(pn, pn, bs, pC, pn); // d_print_mat(n, n, B, n); // double *x; d_zeros_align(&x, pn, 1); // double *y; d_zeros_align(&y, pn, 1); // x[3] = 1.0; /* d_cvt_mat2pmat(n, n, bs-n%bs, bs, C, n, pC+((bs-n%bs))%bs*(bs+1), pn);*/ /* d_print_pmat(pn, pn, bs, pC, pn);*/ /* s_print_pmat(n, n, bss, spD, cns);*/ /* s_print_pmat(n, n+4, bss, spE, cns2);*/ /* timing */ struct timeval tv0, tv1; gettimeofday(&tv0, NULL); // start /* d_print_pmat(n, n, bs, pC, pn);*/ for(rep=0; rep<nrep; rep++) { /* sgemm_nt_lib(n, n, n, spA, cns, spB, cns, spC, cns, 0);*/ ssyrk_spotrf_lib(n, n, n, spE, cns2, spD, cns, diag); /* strtr_l_lib(11, 3, spA+3, cns, spC, cns);*/ /* sgemm_nt_lib(n, n, n, spB, pns, spA, pns, spC, pns, 0);*/ /* dgemm_nt_lib(n, n, n, pA, pn, pB, pn, pC, pn, 0);*/ /* dgemm_nt_lib(n, n, n, pB, pn, pA, pn, pC, pn, 0);*/ /* dtrmm_pup_nn_lib(n, n, pA, pn, B, n, pC, pn);*/ /* dsyrk_ppp_lib(n, n, pA, pn, pC, pn);*/ /* dgemm_ppp_nt_lib(n, n, n, pA, pn, pA, pn, pB, pn, 0);*/ /* dtrmm_ppp_lib(n, n, 0, pA, pn, pB, pn, pC, pn);*/ /* dpotrf_dcopy_lib(n, 0, pC, pn, pL, pn);*/ /* dgemm_pup_nn_lib(n, n, n, pA, pn, B, n, pC, pn, 0);*/ /* dgemm_ppp_nt_lib(n, n, n, pA, pn, pA, pn, pC+(bs-n)*(bs+1), pn, 1);*/ /* d_print_pmat(pn, pn, bs, pC, pn);*/ /* dpotrf_p_dcopy_u_lib(n, (bs-n%bs)%bs, pC+((bs-n%bs))%bs*(bs+1), pn, L, n);*/ /* d_print_pmat(pn, pn, bs, pC, pn);*/ /* d_print_mat(n, n, L, n);*/ /* exit(2);*/ // dgemm_nt_lib(n, n, n, A, n, B, n, C, n, 0); // dgemm_nt_lib_asm(n, n, n, pA, pn, pB, pn, pC, pn, 0); // sgemm_nt_lib_neon(n, n, n, spA, pns, spB, pns, spC, pns, 0); // dsymm_nt_lib(n, n, A, n, B, n, C, n); // dpotrf_lib(n, B, n); // dgemm_nt_lib2(n, pB, pA, pC, pn); // dgemv_n_lib(n-1, n, 1, pn, pA+1, x, y); // dtrmv_n_lib(n-1, 1, pA+1, pn, x, y); /* dtrmv_t_lib(n-1, 1, pA+1, pn, x, y);*/ } gettimeofday(&tv1, NULL); // stop float time = (float) (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6); float flop = 2.0*n*n*n; /* float flop = 1.0*n*n;*/ // float flop = 1.0/3.0*n*n*n; float Gflops = 1e-9*flop/time; float Gflops_max = 1*1; printf("\nn\tGflops\t\t%%\n%d\t%f\t%f\n\n", n, Gflops, 100.0*Gflops/Gflops_max); if(n<=24) { // d_print_pmat(pn, pn, bs, pC, pn); // d_print_pmat(n, n, bs, pB, pn); /* d_print_pmat(n, n, bs, pA, pn);*/ /* d_print_mat(n, n, B, n);*/ /* d_print_pmat(n, n, bs, pB, pn);*/ /* d_print_pmat(n, n, bs, pC, pn);*/ /* d_print_pmat(n, n, bs, pL, pn);*/ s_print_pmat(n, n, bss, spA, cns); s_print_pmat(n, n, bss, spB, cns); /* s_print_pmat(n, n, bss, spC, cns);*/ s_print_pmat(n, n, bss, spE+n*bss, cns2); /* d_print_mat(n, 1, y, pn);*/ } return 0; }
int main() { #if defined(REF_BLAS_OPENBLAS) openblas_set_num_threads(1); #endif #if defined(REF_BLAS_BLIS) omp_set_num_threads(1); #endif printf("\n"); printf("\n"); printf("\n"); printf(" HPMPC -- Library for High-Performance implementation of solvers for MPC.\n"); printf(" Copyright (C) 2014-2015 by Technical University of Denmark. All rights reserved.\n"); printf("\n"); printf(" HPMPC is distributed in the hope that it will be useful,\n"); printf(" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"); printf(" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"); printf(" See the GNU Lesser General Public License for more details.\n"); printf("\n"); printf("\n"); printf("\n"); printf("BLAS performance test - double precision\n"); printf("\n"); // maximum frequency of the processor const float GHz_max = GHZ_MAX; printf("Frequency used to compute theoretical peak: %5.1f GHz (edit test_param.h to modify this value).\n", GHz_max); printf("\n"); // maximum flops per cycle, double precision #if defined(TARGET_X64_AVX2) const float flops_max = 16; printf("Testing BLAS version for AVX2 & FMA3 instruction sets, 64 bit: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_X64_AVX) const float flops_max = 8; printf("Testing BLAS version for AVX instruction set, 64 bit: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_X64_SSE3) || defined(TARGET_AMD_SSE3) const float flops_max = 4; printf("Testing BLAS version for SSE3 instruction set, 64 bit: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_CORTEX_A15) const float flops_max = 2; printf("Testing solvers for ARMv7a VFPv3 instruction set, oprimized for Cortex A15: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_CORTEX_A9) const float flops_max = 1; printf("Testing solvers for ARMv7a VFPv3 instruction set, oprimized for Cortex A9: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_CORTEX_A7) const float flops_max = 0.5; printf("Testing solvers for ARMv7a VFPv3 instruction set, oprimized for Cortex A7: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_X86_ATOM) const float flops_max = 1; printf("Testing BLAS version for SSE3 instruction set, 32 bit, optimized for Intel Atom: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_POWERPC_G2) const float flops_max = 1; printf("Testing BLAS version for POWERPC instruction set, 32 bit: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_C99_4X4) const float flops_max = 2; printf("Testing reference BLAS version, 4x4 kernel: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_C99_4X4_PREFETCH) const float flops_max = 2; printf("Testing reference BLAS version, 4x4 kernel with register prefetch: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #elif defined(TARGET_C99_2X2) const float flops_max = 2; printf("Testing reference BLAS version, 2x2 kernel: theoretical peak %5.1f Gflops\n", flops_max*GHz_max); #endif FILE *f; f = fopen("./test_problems/results/test_blas.m", "w"); // a #if defined(TARGET_X64_AVX2) fprintf(f, "C = 'd_x64_avx2';\n"); fprintf(f, "\n"); #elif defined(TARGET_X64_AVX) fprintf(f, "C = 'd_x64_avx';\n"); fprintf(f, "\n"); #elif defined(TARGET_X64_SSE3) || defined(TARGET_AMD_SSE3) fprintf(f, "C = 'd_x64_sse3';\n"); fprintf(f, "\n"); #elif defined(TARGET_CORTEX_A9) fprintf(f, "C = 'd_ARM_cortex_A9';\n"); fprintf(f, "\n"); #elif defined(TARGET_CORTEX_A7) fprintf(f, "C = 'd_ARM_cortex_A7';\n"); fprintf(f, "\n"); #elif defined(TARGET_CORTEX_A15) fprintf(f, "C = 'd_ARM_cortex_A15';\n"); fprintf(f, "\n"); #elif defined(TARGET_X86_ATOM) fprintf(f, "C = 'd_x86_atom';\n"); fprintf(f, "\n"); #elif defined(TARGET_POWERPC_G2) fprintf(f, "C = 'd_PowerPC_G2';\n"); fprintf(f, "\n"); #elif defined(TARGET_C99_4X4) fprintf(f, "C = 'd_c99_4x4';\n"); fprintf(f, "\n"); #elif defined(TARGET_C99_4X4_PREFETCH) fprintf(f, "C = 'd_c99_4x4';\n"); fprintf(f, "\n"); #elif defined(TARGET_C99_2X2) fprintf(f, "C = 'd_c99_2x2';\n"); fprintf(f, "\n"); #endif fprintf(f, "A = [%f %f];\n", GHz_max, flops_max); fprintf(f, "\n"); fprintf(f, "B = [\n"); int i, j, rep, ll; const int bsd = D_MR; //d_get_mr(); /* int info = 0;*/ printf("\nn\t kernel_dgemm\t dgemm\t\t dsyrk_dpotrf\t dtrmm\t\t dtrtr\t\t dgemv_n\t dgemv_t\t dtrmv_n\t dtrmv_t\t dtrsv_n\t dtrsv_t\t dsymv\t\t dgemv_nt\t\t dsyrk+dpotrf\t BLAS dgemm\t BLAS dgemv_n\t BLAS dgemv_t\n"); printf("\nn\t Gflops\t %%\t Gflops\t %%\t Gflops\t %%\t Gflops\t %%\t Gflops\t %%\t Gflops\t %%\t Gflops\t %%\t Gflops\t %%\t Gflops\t %%\t Gflops\t %%\t Gflops\t %%\t Gflops\t %%\t Gflops\t %%\t Gflops\t %%\t Gflops\t %%\t Gflops\t %%\t Gflops\t %%\n\n"); #if 1 int nn[] = {4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152, 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204, 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 248, 252, 256, 260, 264, 268, 272, 276, 280, 284, 288, 292, 296, 300, 304, 308, 312, 316, 320, 324, 328, 332, 336, 340, 344, 348, 352, 356, 360, 364, 368, 372, 376, 380, 384, 388, 392, 396, 400, 404, 408, 412, 416, 420, 424, 428, 432, 436, 440, 444, 448, 452, 456, 460, 500, 550, 600, 650, 700}; int nnrep[] = {10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 400, 400, 400, 400, 400, 200, 200, 200, 200, 200, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 20, 20, 20, 20, 20, 20, 20, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 4, 4, 4, 4, 4}; for(ll=0; ll<75; ll++) // for(ll=0; ll<115; ll++) // for(ll=0; ll<120; ll++) { int n = nn[ll]; int nrep = nnrep[ll]; #else int nn[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24}; for(ll=0; ll<24; ll++) { int n = nn[ll]; int nrep = 40000; //nnrep[ll]; #endif #if defined(REF_BLAS_BLIS) f77_int n77 = n; #endif double *A; d_zeros(&A, n, n); double *B; d_zeros(&B, n, n); double *C; d_zeros(&C, n, n); double *M; d_zeros(&M, n, n); char c_n = 'n'; char c_t = 't'; int i_1 = 1; #if defined(REF_BLAS_BLIS) f77_int i77_1 = i_1; #endif double d_1 = 1; double d_0 = 0; for(i=0; i<n*n; i++) A[i] = i; for(i=0; i<n; i++) B[i*(n+1)] = 1; for(i=0; i<n*n; i++) M[i] = 1; int pnd = ((n+bsd-1)/bsd)*bsd; int cnd = ((n+D_NCL-1)/D_NCL)*D_NCL; int cnd2 = 2*((n+D_NCL-1)/D_NCL)*D_NCL; int pad = (D_NCL-n%D_NCL)%D_NCL; double *pA; d_zeros_align(&pA, pnd, cnd); double *pB; d_zeros_align(&pB, pnd, cnd); double *pC; d_zeros_align(&pC, pnd, cnd); double *pD; d_zeros_align(&pD, pnd, cnd); double *pE; d_zeros_align(&pE, pnd, cnd2); double *pF; d_zeros_align(&pF, 2*pnd, cnd); double *pL; d_zeros_align(&pL, pnd, cnd); double *pM; d_zeros_align(&pM, pnd, cnd); double *x; d_zeros_align(&x, pnd, 1); double *y; d_zeros_align(&y, pnd, 1); double *x2; d_zeros_align(&x2, pnd, 1); double *y2; d_zeros_align(&y2, pnd, 1); double *diag; d_zeros_align(&diag, pnd, 1); d_cvt_mat2pmat(n, n, A, n, 0, pA, cnd); d_cvt_mat2pmat(n, n, B, n, 0, pB, cnd); d_cvt_mat2pmat(n, n, B, n, 0, pD, cnd); d_cvt_mat2pmat(n, n, A, n, 0, pE, cnd2); d_cvt_mat2pmat(n, n, M, n, 0, pM, cnd); /* d_cvt_mat2pmat(n, n, B, n, 0, pE+n*bsd, pnd);*/ /* d_print_pmat(n, 2*n, bsd, pE, 2*pnd);*/ /* exit(2);*/ for(i=0; i<pnd*cnd; i++) pC[i] = -1; for(i=0; i<pnd; i++) x[i] = 1; for(i=0; i<pnd; i++) x2[i] = 1; double *dummy; /* timing */ struct timeval tvm1, tv0, tv1, tv2, tv3, tv4, tv5, tv6, tv7, tv8, tv9, tv10, tv11, tv12, tv13, tv14, tv15, tv16; /* warm up */ for(rep=0; rep<nrep; rep++) { dgemm_nt_lib(n, n, n, pA, cnd, pB, cnd, 1, pC, cnd, pC, cnd, 1, 1); } gettimeofday(&tvm1, NULL); // start for(rep=0; rep<nrep; rep++) { //dgemm_kernel_nt_lib(n, n, n, pA, cnd, pB, cnd, pC, cnd, pC, cnd, 0, 0, 0); dgemm_nn_lib(n, n, n, pA, cnd, pB, cnd, 0, pC, cnd, pC, cnd, 0, 0); } gettimeofday(&tv0, NULL); // start for(rep=0; rep<nrep; rep++) { dgemm_nt_lib(n, n, n, pA, cnd, pB, cnd, 0, pC, cnd, pC, cnd, 0, 0); } gettimeofday(&tv1, NULL); // stop for(rep=0; rep<nrep; rep++) { //dsyrk_dpotrf_lib(n, n, n, pA, cnd, 1, pD, cnd, pC, cnd, diag, 0); dsyrk_dpotrf_lib_new(n, n, n, pA, cnd, pA, cnd, 1, pD, cnd, pC, cnd, diag); } gettimeofday(&tv2, NULL); // stop for(rep=0; rep<nrep; rep++) { dtrmm_nt_u_lib(n, n, pA, cnd, pB, cnd, pC, cnd); } gettimeofday(&tv3, NULL); // stop for(rep=0; rep<nrep; rep++) { dtrtr_l_lib(n, 0, pA, cnd, pC, cnd); // triangualr matrix transpose //dgetr_lib(n, n, 0, pA, cnd, 0, pC, cnd); // general matrix transpose } gettimeofday(&tv4, NULL); // stop for(rep=0; rep<nrep; rep++) { dgemv_n_lib(n, n, pA, cnd, x, 0, y, y); } gettimeofday(&tv5, NULL); // stop for(rep=0; rep<nrep; rep++) { dgemv_t_lib(n, n, pA, cnd, x, 0, y, y); } gettimeofday(&tv6, NULL); // stop for(rep=0; rep<nrep; rep++) { dtrmv_u_n_lib(n, pA, cnd, x, 0, y); } gettimeofday(&tv7, NULL); // stop for(rep=0; rep<nrep; rep++) { dtrmv_u_t_lib(n, pA, cnd, x, 0, y); } gettimeofday(&tv8, NULL); // stop for(rep=0; rep<nrep; rep++) { dtrsv_n_lib(2*n, n, 1, pF, cnd, x); } gettimeofday(&tv9, NULL); // stop for(rep=0; rep<nrep; rep++) { dtrsv_t_lib(2*n, n, 1, pF, cnd, x); } gettimeofday(&tv10, NULL); // stop for(rep=0; rep<nrep; rep++) { dsymv_lib(n, n, pA, cnd, x, 0, y, y); } gettimeofday(&tv11, NULL); // stop for(rep=0; rep<nrep; rep++) { dgemv_nt_lib(n, n, pA, cnd, x, x2, 0, y, y2, y, y2); } gettimeofday(&tv12, NULL); // stop for(rep=0; rep<nrep; rep++) { dsyrk_nt_lib(n, n, n, pE, cnd2, pE, cnd2, 1, pD, cnd, pE+(n+pad)*bsd, cnd2); //dpotrf_lib(n, n, pE+(n+pad)*bsd, cnd2, pE+(n+pad)*bsd, cnd2, diag); dpotrf_lib_new(n, n, pE+(n+pad)*bsd, cnd2, pE+(n+pad)*bsd, cnd2, diag); //d_print_pmat(pnd, cnd2, bsd, pE, cnd2); //exit(1); //break; } gettimeofday(&tv13, NULL); // stop for(rep=0; rep<nrep; rep++) { #if defined(REF_BLAS_OPENBLAS) || defined(REF_BLAS_NETLIB) dgemm_(&c_n, &c_n, &n, &n, &n, &d_1, A, &n, M, &n, &d_0, C, &n); #endif #if defined(REF_BLAS_BLIS) dgemm_(&c_n, &c_n, &n77, &n77, &n77, &d_1, A, &n77, B, &n77, &d_0, C, &n77); #endif } gettimeofday(&tv14, NULL); // stop for(rep=0; rep<nrep; rep++) { #if defined(REF_BLAS_OPENBLAS) || defined(REF_BLAS_NETLIB) dgemv_(&c_n, &n, &n, &d_1, A, &n, x2, &i_1, &d_0, y, &i_1); #endif #if defined(REF_BLAS_BLIS) dgemv_(&c_n, &n77, &n77, &d_1, A, &n77, x2, &i77_1, &d_0, y, &i77_1); #endif } gettimeofday(&tv15, NULL); // stop for(rep=0; rep<nrep; rep++) { #if defined(REF_BLAS_OPENBLAS) || defined(REF_BLAS_NETLIB) dgemv_(&c_t, &n, &n, &d_1, A, &n, x2, &i_1, &d_0, y, &i_1); #endif #if defined(REF_BLAS_BLIS) dgemv_(&c_t, &n77, &n77, &d_1, A, &n77, x2, &i77_1, &d_0, y, &i77_1); #endif } gettimeofday(&tv16, NULL); // stop float Gflops_max = flops_max * GHz_max; float time_dgemm_kernel = (float) (tv0.tv_sec-tvm1.tv_sec)/(nrep+0.0)+(tv0.tv_usec-tvm1.tv_usec)/(nrep*1e6); float flop_dgemm_kernel = 2.0*n*n*n; float Gflops_dgemm_kernel = 1e-9*flop_dgemm_kernel/time_dgemm_kernel; float time_dgemm = (float) (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6); float flop_dgemm = 2.0*n*n*n; float Gflops_dgemm = 1e-9*flop_dgemm/time_dgemm; float time_dsyrk_dpotrf = (float) (tv2.tv_sec-tv1.tv_sec)/(nrep+0.0)+(tv2.tv_usec-tv1.tv_usec)/(nrep*1e6); float flop_dsyrk_dpotrf = 1.0*n*n*n + 1.0/3.0*n*n*n; float Gflops_dsyrk_dpotrf = 1e-9*flop_dsyrk_dpotrf/time_dsyrk_dpotrf; float time_dtrmm = (float) (tv3.tv_sec-tv2.tv_sec)/(nrep+0.0)+(tv3.tv_usec-tv2.tv_usec)/(nrep*1e6); float flop_dtrmm = 1.0*n*n*n; float Gflops_dtrmm = 1e-9*flop_dtrmm/time_dtrmm; float time_dtrtr = (float) (tv4.tv_sec-tv3.tv_sec)/(nrep+0.0)+(tv4.tv_usec-tv3.tv_usec)/(nrep*1e6); float flop_dtrtr = 0.5*n*n; float Gflops_dtrtr = 1e-9*flop_dtrtr/time_dtrtr; float time_dgemv_n = (float) (tv5.tv_sec-tv4.tv_sec)/(nrep+0.0)+(tv5.tv_usec-tv4.tv_usec)/(nrep*1e6); float flop_dgemv_n = 2.0*n*n; float Gflops_dgemv_n = 1e-9*flop_dgemv_n/time_dgemv_n; float time_dgemv_t = (float) (tv6.tv_sec-tv5.tv_sec)/(nrep+0.0)+(tv6.tv_usec-tv5.tv_usec)/(nrep*1e6); float flop_dgemv_t = 2.0*n*n; float Gflops_dgemv_t = 1e-9*flop_dgemv_t/time_dgemv_t; float time_dtrmv_n = (float) (tv7.tv_sec-tv6.tv_sec)/(nrep+0.0)+(tv7.tv_usec-tv6.tv_usec)/(nrep*1e6); float flop_dtrmv_n = 1.0*n*n; float Gflops_dtrmv_n = 1e-9*flop_dtrmv_n/time_dtrmv_n; float time_dtrmv_t = (float) (tv8.tv_sec-tv7.tv_sec)/(nrep+0.0)+(tv8.tv_usec-tv7.tv_usec)/(nrep*1e6); float flop_dtrmv_t = 1.0*n*n; float Gflops_dtrmv_t = 1e-9*flop_dtrmv_t/time_dtrmv_t; float time_dtrsv_n = (float) (tv9.tv_sec-tv8.tv_sec)/(nrep+0.0)+(tv9.tv_usec-tv8.tv_usec)/(nrep*1e6); float flop_dtrsv_n = 3.0*n*n; float Gflops_dtrsv_n = 1e-9*flop_dtrsv_n/time_dtrsv_n; float time_dtrsv_t = (float) (tv10.tv_sec-tv9.tv_sec)/(nrep+0.0)+(tv10.tv_usec-tv9.tv_usec)/(nrep*1e6); float flop_dtrsv_t = 3.0*n*n; float Gflops_dtrsv_t = 1e-9*flop_dtrsv_t/time_dtrsv_t; float time_dsymv = (float) (tv11.tv_sec-tv10.tv_sec)/(nrep+0.0)+(tv11.tv_usec-tv10.tv_usec)/(nrep*1e6); float flop_dsymv = 2.0*n*n; float Gflops_dsymv = 1e-9*flop_dsymv/time_dsymv; float time_dgemv_nt = (float) (tv12.tv_sec-tv11.tv_sec)/(nrep+0.0)+(tv12.tv_usec-tv11.tv_usec)/(nrep*1e6); float flop_dgemv_nt = 4.0*n*n; float Gflops_dgemv_nt = 1e-9*flop_dgemv_nt/time_dgemv_nt; float time_dsyrk_dpotrf2 = (float) (tv13.tv_sec-tv12.tv_sec)/(nrep+0.0)+(tv13.tv_usec-tv12.tv_usec)/(nrep*1e6); float flop_dsyrk_dpotrf2 = 1.0*n*n*n + 1.0/3.0*n*n*n; float Gflops_dsyrk_dpotrf2 = 1e-9*flop_dsyrk_dpotrf2/time_dsyrk_dpotrf2; float time_dgemm_blas = (float) (tv14.tv_sec-tv13.tv_sec)/(nrep+0.0)+(tv14.tv_usec-tv13.tv_usec)/(nrep*1e6); float flop_dgemm_blas = 2.0*n*n*n; float Gflops_dgemm_blas = 1e-9*flop_dgemm_blas/time_dgemm_blas; float time_dgemv_n_blas = (float) (tv15.tv_sec-tv14.tv_sec)/(nrep+0.0)+(tv15.tv_usec-tv14.tv_usec)/(nrep*1e6); float flop_dgemv_n_blas = 2.0*n*n; float Gflops_dgemv_n_blas = 1e-9*flop_dgemv_n_blas/time_dgemv_n_blas; float time_dgemv_t_blas = (float) (tv16.tv_sec-tv15.tv_sec)/(nrep+0.0)+(tv16.tv_usec-tv15.tv_usec)/(nrep*1e6); float flop_dgemv_t_blas = 2.0*n*n; float Gflops_dgemv_t_blas = 1e-9*flop_dgemv_t_blas/time_dgemv_t_blas; printf("%d\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\n", n, Gflops_dgemm_kernel, 100.0*Gflops_dgemm_kernel/Gflops_max, Gflops_dgemm, 100.0*Gflops_dgemm/Gflops_max, Gflops_dsyrk_dpotrf, 100.0*Gflops_dsyrk_dpotrf/Gflops_max, Gflops_dtrmm, 100.0*Gflops_dtrmm/Gflops_max, Gflops_dtrtr, 100.0*Gflops_dtrtr/Gflops_max, Gflops_dgemv_n, 100.0*Gflops_dgemv_n/Gflops_max, Gflops_dgemv_t, 100.0*Gflops_dgemv_t/Gflops_max, Gflops_dtrmv_n, 100.0*Gflops_dtrmv_n/Gflops_max, Gflops_dtrmv_t, 100.0*Gflops_dtrmv_t/Gflops_max, Gflops_dtrsv_n, 100.0*Gflops_dtrsv_n/Gflops_max, Gflops_dtrsv_t, 100.0*Gflops_dtrsv_t/Gflops_max, Gflops_dsymv, 100.0*Gflops_dsymv/Gflops_max, Gflops_dgemv_nt, 100.0*Gflops_dgemv_nt/Gflops_max, Gflops_dsyrk_dpotrf2, 100.0*Gflops_dsyrk_dpotrf2/Gflops_max, Gflops_dgemm_blas, 100.0*Gflops_dgemm_blas/Gflops_max, Gflops_dgemv_n_blas, 100.0*Gflops_dgemv_n_blas/Gflops_max, Gflops_dgemv_t_blas, 100.0*Gflops_dgemv_t_blas/Gflops_max); fprintf(f, "%d\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\t%7.2f\n", n, Gflops_dgemm_kernel, 100.0*Gflops_dgemm_kernel/Gflops_max, Gflops_dgemm, 100.0*Gflops_dgemm/Gflops_max, Gflops_dsyrk_dpotrf, 100.0*Gflops_dsyrk_dpotrf/Gflops_max, Gflops_dtrmm, 100.0*Gflops_dtrmm/Gflops_max, Gflops_dtrtr, 100.0*Gflops_dtrtr/Gflops_max, Gflops_dgemv_n, 100.0*Gflops_dgemv_n/Gflops_max, Gflops_dgemv_t, 100.0*Gflops_dgemv_t/Gflops_max, Gflops_dtrmv_n, 100.0*Gflops_dtrmv_n/Gflops_max, Gflops_dtrmv_t, 100.0*Gflops_dtrmv_t/Gflops_max, Gflops_dtrsv_n, 100.0*Gflops_dtrsv_n/Gflops_max, Gflops_dtrsv_t, 100.0*Gflops_dtrsv_t/Gflops_max, Gflops_dsymv, 100.0*Gflops_dsymv/Gflops_max, Gflops_dgemv_nt, 100.0*Gflops_dgemv_nt/Gflops_max, Gflops_dsyrk_dpotrf2, 100.0*Gflops_dsyrk_dpotrf2/Gflops_max, Gflops_dgemm_blas, 100.0*Gflops_dgemm_blas/Gflops_max, Gflops_dgemv_n_blas, 100.0*Gflops_dgemv_n_blas/Gflops_max, Gflops_dgemv_t_blas, 100.0*Gflops_dgemv_t_blas/Gflops_max); free(A); free(B); free(M); free(pA); free(pB); free(pC); free(pD); free(pE); free(pF); free(pL); free(pM); free(x); free(y); free(x2); free(y2); } printf("\n"); fprintf(f, "];\n"); fclose(f); return 0; }
int main() { printf("\n"); printf("\n"); printf("\n"); printf(" HPMPC -- Library for High-Performance implementation of solvers for MPC.\n"); printf(" Copyright (C) 2014-2015 by Technical University of Denmark. All rights reserved.\n"); printf("\n"); printf(" HPMPC is distributed in the hope that it will be useful,\n"); printf(" but WITHOUT ANY WARRANTY; without even the implied warranty of\n"); printf(" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"); printf(" See the GNU Lesser General Public License for more details.\n"); printf("\n"); printf("\n"); printf("\n"); #if defined(TARGET_X64_AVX2) || defined(TARGET_X64_AVX) || defined(TARGET_X64_SSE3) || defined(TARGET_X86_ATOM) || defined(TARGET_AMD_SSE3) _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); // flush to zero subnormals !!! works only with one thread !!! #endif int ii, jj; int rep, nrep=1000;//NREP; int nx = NX; // number of states (it has to be even for the mass-spring system test problem) int nu = NU; // number of inputs (controllers) (it has to be at least 1 and at most nx/2 for the mass-spring system test problem) int N = NN; // horizon lenght int nb = nu+nx; // number of box constrained inputs and states int ng = nx; //4; // number of general constraints int ngN = nx; // number of general constraints at the last stage # define USE_IPM_RES 1 // int M = 32; // where the equality constraint hold int nbu = nu<nb ? nu : nb ; int nbx = nb-nu>0 ? nb-nu : 0; #define KEEP_X0 0 // stage-wise variant size int nx_v[N+1]; #if KEEP_X0 nx_v[0] = nx; #else nx_v[0] = 0; #endif for(ii=1; ii<=N; ii++) nx_v[ii] = nx; int nu_v[N+1]; for(ii=0; ii<N; ii++) nu_v[ii] = nu; nu_v[N] = 0; int nb_v[N+1]; #if KEEP_X0 nb_v[0] = nb; #else nb_v[0] = nbu; #endif for(ii=1; ii<N; ii++) nb_v[ii] = nb; nb_v[N] = nbx; int ng_v[N+1]; for(ii=0; ii<N; ii++) ng_v[ii] = ng; ng_v[N] = ngN; // ng_v[M] = nx; // XXX printf(" Test problem: mass-spring system with %d masses and %d controls.\n", nx/2, nu); printf("\n"); printf(" MPC problem size: %d states, %d inputs, %d horizon length, %d two-sided box constraints, %d two-sided general constraints.\n", nx, nu, N, nb, ng); printf("\n"); #if IP == 1 printf(" IP method parameters: primal-dual IP, double precision, %d maximum iterations, %5.1e exit tolerance in duality measure (edit file test_param.c to change them).\n", K_MAX, MU_TOL); #elif IP == 2 printf(" IP method parameters: predictor-corrector IP, double precision, %d maximum iterations, %5.1e exit tolerance in duality measure (edit file test_param.c to change them).\n", K_MAX, MU_TOL); #else printf(" Wrong value for IP solver choice: %d\n", IP); #endif int info = 0; const int bs = D_MR; //d_get_mr(); const int ncl = D_NCL; int pnz = (nu+nx+1+bs-1)/bs*bs; int pnu = (nu+bs-1)/bs*bs; int pnu1 = (nu+1+bs-1)/bs*bs; int pnx = (nx+bs-1)/bs*bs; int pnx1 = (nx+1+bs-1)/bs*bs; int pnux = (nu+nx+bs-1)/bs*bs; int cnx = (nx+ncl-1)/ncl*ncl; int cnu = (nu+ncl-1)/ncl*ncl; int cnux = (nu+nx+ncl-1)/ncl*ncl; int pnb_v[N+1]; int png_v[N+1]; int pnx_v[N+1]; int pnz_v[N+1]; int pnux_v[N+1]; int cnx_v[N+1]; int cnux_v[N+1]; int cng_v[N+1]; for(ii=0; ii<N; ii++) { pnb_v[ii] = (nb_v[ii]+bs-1)/bs*bs; png_v[ii] = (ng_v[ii]+bs-1)/bs*bs; pnx_v[ii] = (nx_v[ii]+bs-1)/bs*bs; pnz_v[ii] = (nu_v[ii]+nx_v[ii]+1+bs-1)/bs*bs; pnux_v[ii] = (nu_v[ii]+nx_v[ii]+bs-1)/bs*bs; cnx_v[ii] = (nx_v[ii]+ncl-1)/ncl*ncl; cnux_v[ii] = (nu_v[ii]+nx_v[ii]+ncl-1)/ncl*ncl; cng_v[ii] = (ng_v[ii]+ncl-1)/ncl*ncl; } ii = N; pnb_v[ii] = (nb_v[ii]+bs-1)/bs*bs; png_v[ii] = (ng_v[ii]+bs-1)/bs*bs; pnx_v[ii] = (nx_v[ii]+bs-1)/bs*bs; pnz_v[ii] = (nx_v[ii]+1+bs-1)/bs*bs; pnux_v[ii] = (nx_v[ii]+bs-1)/bs*bs; cnx_v[ii] = (nx_v[ii]+ncl-1)/ncl*ncl; cnux_v[ii] = (nx_v[ii]+ncl-1)/ncl*ncl; cng_v[ii] = (ng_v[ii]+ncl-1)/ncl*ncl; /************************************************ * dynamical system ************************************************/ double *A; d_zeros(&A, nx, nx); // states update matrix double *B; d_zeros(&B, nx, nu); // inputs matrix double *b; d_zeros_align(&b, nx, 1); // states offset double *x0; d_zeros_align(&x0, nx, 1); // initial state double Ts = 0.5; // sampling time mass_spring_system(Ts, nx, nu, N, A, B, b, x0); for(jj=0; jj<nx; jj++) b[jj] = 0.1; for(jj=0; jj<nx; jj++) x0[jj] = 0; x0[0] = 2.5; x0[1] = 2.5; double *pA; d_zeros_align(&pA, pnx, cnx); d_cvt_mat2pmat(nx, nx, A, nx, 0, pA, cnx); double *b0; d_zeros_align(&b0, pnx, 1); for(ii=0; ii<nx; ii++) b0[ii] = b[ii]; #if ! KEEP_X0 dgemv_n_lib(nx, nx, pA, cnx, x0, 1, b0, b0); #endif double *pBAbt0; d_zeros_align(&pBAbt0, pnz_v[0], cnx_v[1]); d_cvt_tran_mat2pmat(nx_v[1], nu_v[0], B, nx_v[1], 0, pBAbt0, cnx_v[1]); d_cvt_tran_mat2pmat(nx_v[1], nx_v[0], A, nx_v[1], nu_v[0], pBAbt0+nu_v[0]/bs*bs*cnx_v[1]+nu_v[0]%bs, cnx_v[1]); d_cvt_tran_mat2pmat(nx_v[1], 1, b0, nx_v[1], nu_v[0]+nx_v[0], pBAbt0+(nu_v[0]+nx_v[0])/bs*bs*cnx_v[1]+(nu_v[0]+nx_v[0])%bs, cnx_v[1]); double *pBAbt1; if(N>1) { d_zeros_align(&pBAbt1, pnz_v[1], cnx_v[2]); d_cvt_tran_mat2pmat(nx_v[2], nu_v[1], B, nx_v[2], 0, pBAbt1, cnx_v[2]); d_cvt_tran_mat2pmat(nx_v[2], nx_v[1], A, nx_v[2], nu_v[1], pBAbt1+nu_v[1]/bs*bs*cnx_v[2]+nu_v[1]%bs, cnx_v[2]); d_cvt_tran_mat2pmat(nx_v[2], 1, b, nx_v[2], nu_v[1]+nx_v[1], pBAbt1+(nu_v[1]+nx_v[1])/bs*bs*cnx_v[2]+(nu_v[1]+nx_v[1])%bs, cnx_v[2]); } #if 0 d_print_pmat(nu_v[0]+nx_v[0]+1, nx_v[1], bs, pBAbt0, cnx_v[1]); d_print_pmat(nu_v[1]+nx_v[1]+1, nx_v[2], bs, pBAbt1, cnx_v[2]); exit(2); #endif /************************************************ * box & general constraints ************************************************/ int *idx0; i_zeros(&idx0, nb_v[0], 1); double *d0; d_zeros_align(&d0, 2*pnb_v[0]+2*png_v[0], 1); #if KEEP_X0 for(jj=0; jj<nbu; jj++) { d0[jj] = - 0.5; // umin d0[pnb_v[0]+jj] = 0.5; // umax idx0[jj] = jj; } for(; jj<nb; jj++) { d0[jj] = x0[jj-nu]; // xmin d0[pnb_v[0]+jj] = x0[jj-nu]; // xmax idx0[jj] = jj; } #else for(jj=0; jj<nbu; jj++) { d0[jj] = - 0.5; // umin d0[pnb_v[0]+jj] = 0.5; // umax idx0[jj] = jj; } #endif for(jj=0; jj<ng_v[0]; jj++) { d0[2*pnb_v[0]+jj] = - 100.0; // xmin d0[2*pnb_v[0]+png_v[0]+jj] = 100.0; // xmax } #if 0 i_print_mat(1, nb_v[0], idx0, 1); d_print_mat(1, 2*pnb_v[0]+2*png_v[0], d0, 1); exit(2); #endif int *idx1; i_zeros(&idx1, nb_v[1], 1); double *d1; d_zeros_align(&d1, 2*pnb_v[1]+2*png_v[1], 1); for(jj=0; jj<nbu; jj++) { d1[jj] = - 0.5; // umin d1[pnb_v[1]+jj] = 0.5; // umax idx1[jj] = jj; } for(; jj<nb; jj++) { d1[jj] = - 10.0; // xmin d1[pnb_v[1]+jj] = 10.0; // xmax idx1[jj] = jj; } for(jj=0; jj<ng_v[1]; jj++) { d1[2*pnb_v[1]+jj] = - 100.0; // xmin d1[2*pnb_v[1]+png_v[1]+jj] = 100.0; // xmax } // i_print_mat(nb, 1, idx1, nb); int *idxN; i_zeros(&idxN, nb_v[N], 1); double *dN; d_zeros_align(&dN, 2*pnb_v[N]+2*png_v[N], 1); for(jj=0; jj<nbx; jj++) { dN[jj] = - 10.0; // xmin dN[pnb_v[N]+jj] = 10.0; // xmax idxN[jj] = jj; } for(jj=0; jj<ng_v[N]; jj++) { dN[2*pnb_v[N]+jj] = - 0.0; // xmin dN[2*pnb_v[N]+png_v[N]+jj] = 0.0; // xmax } // d_print_mat(1, 2*pnb+2*png, d, 1); // d_print_mat(1, 2*pnb_v[N]+2*png_v[N], dN, 1); // exit(1); // double *dM; d_zeros_align(&dM, 2*pnb_v[M]+2*png_v[M], 1); // for(jj=0; jj<nbu; jj++) // { // dM[jj] = - 0.5; // umin // dM[pnb_v[1]+jj] = 0.5; // umax // } // for(; jj<nb; jj++) // { // dM[jj] = - 4.0; // xmin // dM[pnb_v[1]+jj] = 4.0; // xmax // } // for(jj=0; jj<ng_v[M]; jj++) // { // dM[2*pnb_v[M]+jj] = - 0.5; // xmin // dM[2*pnb_v[M]+png_v[M]+jj] = - 0.5; // xmax // } double *C; d_zeros(&C, ng, nx); for(ii=0; ii<ng; ii++) C[ii*(ng+1)] = 1.0; double *D; d_zeros(&D, ng, nu); // first stage double *pDCt0; d_zeros_align(&pDCt0, pnux_v[0], cng_v[0]); // middle stage double *DC1; d_zeros(&DC1, ng_v[1], nu_v[1]+nx_v[1]); for(jj=0; jj<ng_v[1]; jj++) DC1[jj+(nu_v[1]+jj)*ng_v[1]] = 1.0; // d_print_mat(ng_v[1], nu_v[1]+nx_v[1], DC1, ng_v[1]); double *pDCt1; d_zeros_align(&pDCt1, pnux_v[1], cng_v[1]); d_cvt_tran_mat2pmat(ng_v[1], nu_v[1]+nx_v[1], DC1, ng_v[1], 0, pDCt1, cng_v[1]); // d_print_pmat(nu_v[1]+nx_v[1], ng_v[1], bs, pDCt1, cng_v[1]); // exit(2); // last stage double *DCN; d_zeros(&DCN, ng_v[N], nx_v[N]); for(jj=0; jj<ng_v[N]; jj++) DCN[jj*(ng_v[N]+1)] = 1.0; // d_print_mat(ng_v[N], nx_v[N], DCN, ng_v[N]); double *pDCtN; d_zeros_align(&pDCtN, pnx_v[N], cng_v[N]); d_cvt_tran_mat2pmat(ng_v[N], nx_v[N], DCN, ng_v[N], 0, pDCtN, cng_v[N]); // d_print_pmat(nx_v[N], ng_v[N], bs, pDCtN, cng_v[N]); // constrained stage // double *DCM; d_zeros(&DCM, ng_v[M], nu_v[M]+nx_v[M]); // for(jj=0; jj<ng_v[M]; jj++) DCM[jj+(jj+nu_v[M])*ng_v[M]] = 1.0; // d_print_mat(ng_v[M], nu_v[M]+nx_v[M], DCM, ng_v[M]); // double *pDCtM; d_zeros_align(&pDCtM, pnux_v[M], cng_v[M]); // d_cvt_tran_mat2pmat(ng_v[M], nu_v[M]+nx_v[M], DCM, ng_v[M], 0, pDCtM, cng_v[M]); // d_print_pmat(nu_v[M]+nx_v[M], ng_v[M], bs, pDCtM, cng_v[M]); // exit(2); /************************************************ * cost function ************************************************/ double *Q; d_zeros(&Q, nx, nx); for(ii=0; ii<nx; ii++) Q[ii*(nx+1)] = 1.0; double *R; d_zeros(&R, nu, nu); for(ii=0; ii<nu; ii++) R[ii*(nu+1)] = 2.0; double *S; d_zeros(&S, nu, nx); // S=0, so no need to update r0 double *q; d_zeros(&q, nx, 1); for(ii=0; ii<nx; ii++) q[ii] = 0.1; double *r; d_zeros(&r, nu, 1); for(ii=0; ii<nu; ii++) r[ii] = 0.2; #if KEEP_X0 double *pRSQ0; d_zeros_align(&pRSQ0, pnz, cnux); d_cvt_mat2pmat(nu, nu, R, nu, 0, pRSQ0, cnux); d_cvt_tran_mat2pmat(nu, nx, S, nu, nu, pRSQ0+nu/bs*bs*cnux+nu%bs, cnux); d_cvt_tran_mat2pmat(nu, 1, r, nu, nu+nx, pRSQ0+(nu+nx)/bs*bs*cnux+(nu+nx)%bs, cnux); d_cvt_mat2pmat(nx, nx, Q, nx, nu, pRSQ0+nu/bs*bs*cnux+nu%bs+nu*bs, cnux); d_cvt_tran_mat2pmat(nx, 1, q, nx, nu+nx, pRSQ0+(nu+nx)/bs*bs*cnux+(nu+nx)%bs+nu*bs, cnux); // d_print_pmat(nu+nx+1, nu+nx, bs, pRSQ0, cnux); double *rq0; d_zeros_align(&rq0, pnux, 1); d_copy_mat(nu, 1, r, nu, rq0, pnux); d_copy_mat(nx, 1, q, nx, rq0+nu, pnux); #else double *pRSQ0; d_zeros_align(&pRSQ0, pnu1, cnu); d_cvt_mat2pmat(nu, nu, R, nu, 0, pRSQ0, cnu); d_cvt_tran_mat2pmat(nu, 1, r, nu, nu, pRSQ0+nu/bs*bs*cnu+nu%bs, cnu); // d_print_pmat(nu+1, nu, bs, pRSQ0, cnu); double *rq0; d_zeros_align(&rq0, pnu, 1); d_copy_mat(nu, 1, r, nu, rq0, pnu); #endif double *pRSQ1; d_zeros_align(&pRSQ1, pnz, cnux); d_cvt_mat2pmat(nu, nu, R, nu, 0, pRSQ1, cnux); d_cvt_tran_mat2pmat(nu, nx, S, nu, nu, pRSQ1+nu/bs*bs*cnux+nu%bs, cnux); d_cvt_tran_mat2pmat(nu, 1, r, nu, nu+nx, pRSQ1+(nu+nx)/bs*bs*cnux+(nu+nx)%bs, cnux); d_cvt_mat2pmat(nx, nx, Q, nx, nu, pRSQ1+nu/bs*bs*cnux+nu%bs+nu*bs, cnux); d_cvt_tran_mat2pmat(nx, 1, q, nx, nu+nx, pRSQ1+(nu+nx)/bs*bs*cnux+(nu+nx)%bs+nu*bs, cnux); // d_print_pmat(nu+nx+1, nu+nx, bs, pRSQ1, cnux); double *rq1; d_zeros_align(&rq1, pnux, 1); d_copy_mat(nu, 1, r, nu, rq1, pnux); d_copy_mat(nx, 1, q, nx, rq1+nu, pnux); double *pRSQN; d_zeros_align(&pRSQN, pnx1, cnx); d_cvt_mat2pmat(nx, nx, Q, nx, 0, pRSQN, cnx); d_cvt_tran_mat2pmat(nx, 1, q, nx, nx, pRSQN+(nx)/bs*bs*cnx+(nx)%bs, cnx); // d_print_pmat(nx+1, nx, bs, pRSQN, cnx); double *rqN; d_zeros_align(&rqN, pnx, 1); d_copy_mat(nx, 1, q, nx, rqN, pnx); // maximum element in cost functions double mu0 = 2.0; /************************************************ * high level interface work space ************************************************/ #if 0 double *rA; d_zeros(&rA, nx, N*nx); d_rep_mat(N, nx, nx, A, nx, rA, nx); double *rB; d_zeros(&rB, nx, N*nu); d_rep_mat(N, nx, nu, B, nx, rB, nx); double *rC; d_zeros(&rC, ng, (N+1)*nx); d_rep_mat(N, ng, nx, C, ng, rC+nx*ng, ng); double *CN = DCN; double *rD; d_zeros(&rD, ng, N*nu); d_rep_mat(N, ng, nu, D, ng, rD, ng); double *rb; d_zeros(&rb, nx, N*1); d_rep_mat(N, nx, 1, b, nx, rb, nx); double *rQ; d_zeros(&rQ, nx, N*nx); d_rep_mat(N, nx, nx, Q, nx, rQ, nx); double *rQf; d_zeros(&rQf, nx, nx); d_copy_mat(nx, nx, Q, nx, rQf, nx); double *rS; d_zeros(&rS, nu, N*nx); d_rep_mat(N, nu, nx, S, nu, rS, nu); double *rR; d_zeros(&rR, nu, N*nu); d_rep_mat(N, nu, nu, R, nu, rR, nu); double *rq; d_zeros(&rq, nx, N); d_rep_mat(N, nx, 1, q, nx, rq, nx); double *rqf; d_zeros(&rqf, nx, 1); d_copy_mat(nx, 1, q, nx, rqf, nx); double *rr; d_zeros(&rr, nu, N); d_rep_mat(N, nu, 1, r, nu, rr, nu); double *lb; d_zeros(&lb, nb, 1); for(ii=0; ii<nb; ii++) lb[ii] = d1[ii]; double *rlb; d_zeros(&rlb, nb, N+1); d_rep_mat(N+1, nb, 1, lb, nb, rlb, nb); // d_print_mat(nb, N+1, rlb, nb); double *lg; d_zeros(&lg, ng, 1); for(ii=0; ii<ng; ii++) lg[ii] = d1[2*pnb_v[1]+ii]; double *rlg; d_zeros(&rlg, ng, N); d_rep_mat(N, ng, 1, lg, ng, rlg, ng); // d_print_mat(ng, N, rlg, ng); double *lgN; d_zeros(&lgN, ngN, 1); for(ii=0; ii<ngN; ii++) lgN[ii] = dN[2*pnb_v[N]+ii]; // d_print_mat(ngN, 1, lgN, ngN); double *ub; d_zeros(&ub, nb, 1); for(ii=0; ii<nb; ii++) ub[ii] = d1[pnb_v[1]+ii]; double *rub; d_zeros(&rub, nb, N+1); d_rep_mat(N+1, nb, 1, ub, nb, rub, nb); // d_print_mat(nb, N+1, rub, nb); double *ug; d_zeros(&ug, ng, 1); for(ii=0; ii<ng; ii++) ug[ii] = d1[2*pnb_v[1]+png_v[1]+ii]; double *rug; d_zeros(&rug, ng, N); d_rep_mat(N, ng, 1, ug, ng, rug, ng); // d_print_mat(ng, N, rug, ng); double *ugN; d_zeros(&ugN, ngN, 1); for(ii=0; ii<ngN; ii++) ugN[ii] = dN[2*pnb_v[N]+png_v[N]+ii]; // d_print_mat(ngN, 1, ugN, ngN); double *rx; d_zeros(&rx, nx, N+1); d_copy_mat(nx, 1, x0, nx, rx, nx); double *ru; d_zeros(&ru, nu, N); double *rpi; d_zeros(&rpi, nx, N); double *rlam; d_zeros(&rlam, N*2*(nb+ng)+2*(nb+ngN), 1); double *rt; d_zeros(&rt, N*2*(nb+ng)+2*(nb+ngN), 1); double *rwork = (double *) malloc(hpmpc_d_ip_mpc_hard_tv_work_space_size_bytes(N, nx, nu, nb, ng, ngN)); double inf_norm_res[4] = {}; // infinity norm of residuals: rq, rb, rd, mu #endif /************************************************ * low level interface work space ************************************************/ double *hpBAbt[N]; double *hpDCt[N+1]; double *hb[N]; double *hpRSQ[N+1]; double *hrq[N+1]; double *hd[N+1]; int *idx[N+1]; double *hux[N+1]; double *hpi[N]; double *hlam[N+1]; double *ht[N+1]; double *hrb[N]; double *hrrq[N+1]; double *hrd[N+1]; hpBAbt[0] = pBAbt0; hpDCt[0] = pDCt0; hb[0] = b0; hpRSQ[0] = pRSQ0; hrq[0] = rq0; hd[0] = d0; idx[0] = idx0; d_zeros_align(&hux[0], pnux_v[0], 1); d_zeros_align(&hpi[0], pnx_v[1], 1); d_zeros_align(&hlam[0], 2*pnb_v[0]+2*png_v[0], 1); d_zeros_align(&ht[0], 2*pnb_v[0]+2*png_v[0], 1); d_zeros_align(&hrb[0], pnx_v[1], 1); d_zeros_align(&hrrq[0], pnz_v[0], 1); d_zeros_align(&hrd[0], 2*pnb_v[0]+2*png_v[0], 1); for(ii=1; ii<N; ii++) { hpBAbt[ii] = pBAbt1; // d_zeros_align(&hpBAbt[ii], pnz_v[ii], cnx_v[ii+1]); for(jj=0; jj<pnz_v[ii]*cnx_v[ii+1]; jj++) hpBAbt[ii][jj] = pBAbt1[jj]; hpDCt[ii] = pDCt1; hb[ii] = b; hpRSQ[ii] = pRSQ1; // d_zeros_align(&hpRSQ[ii], pnz_v[ii], cnux_v[ii]); for(jj=0; jj<pnz_v[ii]*cnux_v[ii]; jj++) hpRSQ[ii][jj] = pRSQ1[jj]; hrq[ii] = rq1; hd[ii] = d1; idx[ii] = idx1; d_zeros_align(&hux[ii], pnux_v[ii], 1); d_zeros_align(&hpi[ii], pnx_v[ii+1], 1); d_zeros_align(&hlam[ii], 2*pnb_v[ii]+2*png_v[ii], 1); d_zeros_align(&ht[ii], 2*pnb_v[ii]+2*png_v[ii], 1); d_zeros_align(&hrb[ii], pnx_v[ii+1], 1); d_zeros_align(&hrrq[ii], pnz_v[ii], 1); d_zeros_align(&hrd[ii], 2*pnb_v[ii]+2*png_v[ii], 1); } hpDCt[N] = pDCtN; hpRSQ[N] = pRSQN; hrq[N] = rqN; hd[N] = dN; idx[N] = idxN; d_zeros_align(&hux[N], pnx, 1); d_zeros_align(&hlam[N], 2*pnb_v[N]+2*png_v[N], 1); d_zeros_align(&ht[N], 2*pnb_v[N]+2*png_v[N], 1); d_zeros_align(&hrrq[N], pnz_v[N], 1); d_zeros_align(&hrd[N], 2*pnb_v[N]+2*png_v[N], 1); // hpDCt[M] = pDCtM; // hd[M] = dM; double mu = 0.0; #if USE_IPM_RES double *work; d_zeros_align(&work, d_ip2_res_mpc_hard_tv_work_space_size_bytes(N, nx_v, nu_v, nb_v, ng_v)/sizeof(double), 1); #else double *work; d_zeros_align(&work, d_ip2_mpc_hard_tv_work_space_size_bytes(N, nx_v, nu_v, nb_v, ng_v)/sizeof(double), 1); #endif /************************************************ * (new) high level interface work space ************************************************/ // box constraints double *lb0; d_zeros(&lb0, nb_v[0], 1); for(ii=0; ii<nb_v[0]; ii++) lb0[ii] = d0[ii]; double *ub0; d_zeros(&ub0, nb_v[0], 1); for(ii=0; ii<nb_v[0]; ii++) ub0[ii] = d0[pnb_v[0]+ii]; double *lb1; d_zeros(&lb1, nb_v[1], 1); for(ii=0; ii<nb_v[1]; ii++) lb1[ii] = d1[ii]; double *ub1; d_zeros(&ub1, nb_v[1], 1); for(ii=0; ii<nb_v[1]; ii++) ub1[ii] = d1[pnb_v[1]+ii]; double *lbN; d_zeros(&lbN, nb_v[N], 1); for(ii=0; ii<nb_v[N]; ii++) lbN[ii] = dN[ii]; double *ubN; d_zeros(&ubN, nb_v[N], 1); for(ii=0; ii<nb_v[N]; ii++) ubN[ii] = dN[pnb_v[N]+ii]; // general constraints double *lg0; d_zeros(&lg0, ng_v[0], 1); for(ii=0; ii<ng_v[0]; ii++) lg0[ii] = d0[2*pnb_v[0]+ii]; double *ug0; d_zeros(&ug0, ng_v[0], 1); for(ii=0; ii<ng_v[0]; ii++) ug0[ii] = d0[2*pnb_v[0]+png_v[0]+ii]; double *lg1; d_zeros(&lg1, ng_v[1], 1); for(ii=0; ii<ng_v[1]; ii++) lg1[ii] = d1[2*pnb_v[1]+ii]; double *ug1; d_zeros(&ug1, ng_v[1], 1); for(ii=0; ii<ng_v[1]; ii++) ug1[ii] = d1[2*pnb_v[1]+png_v[1]+ii]; double *lgN; d_zeros(&lgN, ng_v[N], 1); for(ii=0; ii<ng_v[N]; ii++) lgN[ii] = dN[2*pnb_v[N]+ii]; double *ugN; d_zeros(&ugN, ng_v[N], 1); for(ii=0; ii<ng_v[N]; ii++) ugN[ii] = dN[2*pnb_v[N]+png_v[N]+ii]; // data matrices double *hA[N]; double *hB[N]; double *hC[N+1]; double *hD[N]; double *hQ[N+1]; double *hS[N]; double *hR[N]; double *hq[N+1]; double *hr[N]; double *hlb[N+1]; double *hub[N+1]; double *hlg[N+1]; double *hug[N+1]; double *hx[N+1]; double *hu[N]; double *hpi1[N]; double *hlam1[N+1]; double *ht1[N+1]; double inf_norm_res[4] = {}; // infinity norm of residuals: rq, rb, rd, mu ii = 0; hA[0] = A; hB[0] = B; hC[0] = C; hD[0] = D; hQ[0] = Q; hS[0] = S; hR[0] = R; hq[0] = q; hr[0] = r; hlb[0] = lb0; hub[0] = ub0; hlg[0] = lg0; hug[0] = ug0; d_zeros(&hx[0], nx_v[0], 1); d_zeros(&hu[0], nu_v[0], 1); d_zeros(&hpi1[0], nx_v[1], 1); d_zeros(&hlam1[0], 2*nb_v[0]+2*ng_v[0], 1); d_zeros(&ht1[0], 2*nb_v[0]+2*ng_v[0], 1); for(ii=1; ii<N; ii++) { hA[ii] = A; hB[ii] = B; hC[ii] = C; hD[ii] = D; hQ[ii] = Q; hS[ii] = S; hR[ii] = R; hq[ii] = q; hr[ii] = r; hlb[ii] = lb1; hub[ii] = ub1; hlg[ii] = lg1; hug[ii] = ug1; d_zeros(&hx[ii], nx_v[ii], 1); d_zeros(&hu[ii], nu_v[ii], 1); d_zeros(&hpi1[ii], nx_v[ii+1], 1); d_zeros(&hlam1[ii], 2*nb_v[ii]+2*ng_v[ii], 1); d_zeros(&ht1[ii], 2*nb_v[ii]+2*ng_v[ii], 1); } ii = N; hC[N] = C; hQ[N] = Q; hq[N] = q; hlb[N] = lbN; hub[N] = ubN; hlg[N] = lgN; hug[N] = ugN; d_zeros(&hx[N], nx_v[N], 1); d_zeros(&hlam1[N], 2*nb_v[N]+2*ng_v[N], 1); d_zeros(&ht1[N], 2*nb_v[N]+2*ng_v[N], 1); // work space #if 0 printf("work space in bytes: %d\n", hpmpc_d_ip_ocp_hard_tv_work_space_size_bytes(N, nx_v, nu_v, nb_v, ng_v)); exit(3); #endif void *work1 = malloc(hpmpc_d_ip_ocp_hard_tv_work_space_size_bytes(N, nx_v, nu_v, nb_v, ng_v)); double *ptr_work1 = (double *) work1; /************************************************ * solvers common stuff ************************************************/ int hpmpc_status; int kk, kk_avg; int k_max = 10; double mu_tol = 1e-20; double alpha_min = 1e-8; int warm_start = 0; // read initial guess from x and u double *stat; d_zeros(&stat, k_max, 5); int compute_res = 1; int compute_mult = 1; struct timeval tv0, tv1, tv2, tv3; double time; double **dummy; /************************************************ * call the solver (high-level interface) ************************************************/ #if 1 int time_invariant = 0; // assume the problem to be time invariant int free_x0 = 0; // assume x0 as optimization variable gettimeofday(&tv0, NULL); // stop kk_avg = 0; for(rep=0; rep<nrep; rep++) { // hpmpc_status = fortran_order_d_ip_mpc_hard_tv(&kk, k_max, mu0, mu_tol, N, nx, nu, nb, ng, ngN, time_invariant, free_x0, warm_start, rA, rB, rb, rQ, rQf, rS, rR, rq, rqf, rr, rlb, rub, rC, rD, rlg, rug, CN, lgN, ugN, rx, ru, rpi, rlam, rt, inf_norm_res, rwork, stat); hpmpc_status = fortran_order_d_ip_ocp_hard_tv(&kk, k_max, mu0, mu_tol, N, nx_v, nu_v, nb_v, ng_v, warm_start, hA, hB, hb, hQ, hS, hR, hq, hr, hlb, hub, hC, hD, hlg, hug, hx, hu, hpi1, hlam1, ht1, inf_norm_res, work1, stat); kk_avg += kk; } gettimeofday(&tv1, NULL); // stop printf("\nsolution from high-level interface\n\n"); // d_print_mat(nx, N+1, rx, nx); // d_print_mat(nu, N, ru, nu); for(ii=0; ii<=N; ii++) d_print_mat(1, nx_v[ii], hx[ii], 1); for(ii=0; ii<N; ii++) d_print_mat(1, nu_v[ii], hu[ii], 1); printf("\ninfinity norm of residuals\n\n"); d_print_mat_e(1, 4, inf_norm_res, 1); time = (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6); printf("\nstatistics from last run\n\n"); for(jj=0; jj<kk; jj++) printf("k = %d\tsigma = %f\talpha = %f\tmu = %f\t\tmu = %e\talpha = %f\tmu = %f\tmu = %e\n", jj, stat[5*jj], stat[5*jj+1], stat[5*jj+2], stat[5*jj+2], stat[5*jj+3], stat[5*jj+4], stat[5*jj+4]); printf("\n"); printf("\n"); printf(" Average number of iterations over %d runs: %5.1f\n", nrep, kk_avg / (double) nrep); printf(" Average solution time over %d runs: %5.2e seconds\n", nrep, time); printf("\n\n"); gettimeofday(&tv0, NULL); // stop kk_avg = 0; for(rep=0; rep<nrep; rep++) { // fortran_order_d_solve_kkt_new_rhs_mpc_hard_tv(N, nx, nu, nb, ng, ngN, time_invariant, free_x0, rA, rB, rb, rQ, rQf, rS, rR, rq, rqf, rr, rlb, rub, rC, rD, rlg, rug, CN, lgN, ugN, rx, ru, rpi, rlam, rt, inf_norm_res, rwork); fortran_order_d_solve_kkt_new_rhs_ocp_hard_tv(N, nx_v, nu_v, nb_v, ng_v, hA, hB, hb, hQ, hS, hR, hq, hr, hlb, hub, hC, hD, hlg, hug, hx, hu, hpi1, hlam1, ht1, inf_norm_res, work1); kk_avg += kk; } gettimeofday(&tv1, NULL); // stop printf("\nsolution from high-level interface (resolve final kkt)\n\n"); // d_print_mat(nx, N+1, rx, nx); // d_print_mat(nu, N, ru, nu); for(ii=0; ii<=N; ii++) d_print_mat(1, nx_v[ii], hx[ii], 1); for(ii=0; ii<N; ii++) d_print_mat(1, nu_v[ii], hu[ii], 1); printf("\ninfinity norm of residuals\n\n"); d_print_mat_e(1, 4, inf_norm_res, 1); time = (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6); printf(" Average solution time over %d runs: %5.2e seconds\n", nrep, time); #endif /************************************************ * call the solver (low-level interface) ************************************************/ // for(ii=0; ii<N; ii++) // d_print_pmat(nu_v[ii]+nx_v[ii]+1, nx_v[ii+1], bs, hpBAbt[ii], cnx_v[ii+1]); // exit(3); gettimeofday(&tv0, NULL); // stop kk_avg = 0; printf("\nsolution...\n"); for(rep=0; rep<nrep; rep++) { #if USE_IPM_RES hpmpc_status = d_ip2_res_mpc_hard_tv(&kk, k_max, mu0, mu_tol, alpha_min, warm_start, stat, N, nx_v, nu_v, nb_v, idx, ng_v, hpBAbt, hpRSQ, hpDCt, hd, hux, compute_mult, hpi, hlam, ht, work); #else hpmpc_status = d_ip2_mpc_hard_tv(&kk, k_max, mu0, mu_tol, alpha_min, warm_start, stat, N, nx_v, nu_v, nb_v, idx, ng_v, hpBAbt, hpRSQ, hpDCt, hd, hux, compute_mult, hpi, hlam, ht, work); #endif kk_avg += kk; } printf("\ndone\n"); gettimeofday(&tv1, NULL); // stop printf("\nsolution from low-level interface (original problem)\n\n"); printf("\nux\n\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nu_v[ii]+nx_v[ii], hux[ii], 1); printf("\npi\n\n"); for(ii=0; ii<N; ii++) d_print_mat(1, nx_v[ii+1], hpi[ii], 1); // printf("\nux\n\n"); // for(ii=0; ii<=N; ii++) // d_print_mat(1, 2*pnb_v[ii]+2*png_v[ii], hlam[ii], 1); // printf("\nux\n\n"); // for(ii=0; ii<=N; ii++) // d_print_mat(1, 2*pnb_v[ii]+2*png_v[ii], ht[ii], 1); // residuals if(compute_res) { // compute residuals d_res_mpc_hard_tv(N, nx_v, nu_v, nb_v, idx, ng_v, hpBAbt, hb, hpRSQ, hrq, hux, hpDCt, hd, hpi, hlam, ht, hrrq, hrb, hrd, &mu); // print residuals printf("\nhrrq\n\n"); for(ii=0; ii<=N; ii++) d_print_mat_e(1, nu_v[ii]+nx_v[ii], hrrq[ii], 1); printf("\nhrb\n\n"); for(ii=0; ii<N; ii++) d_print_mat_e(1, nx_v[ii+1], hrb[ii], 1); printf("\nhrd low\n\n"); for(ii=0; ii<=N; ii++) d_print_mat_e(1, nb_v[ii], hrd[ii], 1); printf("\nhrd up\n\n"); for(ii=0; ii<=N; ii++) d_print_mat_e(1, nb_v[ii], hrd[ii]+pnb_v[ii], 1); } // zero the solution again for(ii=0; ii<=N; ii++) for(jj=0; jj<nu_v[ii]+nx_v[ii]; jj++) hux[ii][jj] = 0.0; // modify constraints #if 0 for(jj=0; jj<nbx; jj++) { dN[jj] = - 4.0; // xmin dN[pnb_v[N]+jj] = 4.0; // xmax idxN[jj] = jj; } for(jj=0; jj<ng_v[N]; jj++) { dN[2*pnb_v[N]+jj] = 0.1; // xmin dN[2*pnb_v[N]+png_v[N]+jj] = 0.1; // xmax } #endif #if 0 for(ii=0; ii<=N; ii++) d_print_pmat(nu_v[ii]+nx_v[ii]+1, nu_v[ii]+nx_v[ii], bs, hpRSQ[ii], cnux_v[ii]); for(ii=0; ii<=N; ii++) d_print_mat(1, nu_v[ii]+nx_v[ii], hrq[ii], 1); exit(1); #endif gettimeofday(&tv2, NULL); // stop printf("\nsolution...\n"); for(rep=0; rep<nrep; rep++) { #if USE_IPM_RES d_kkt_solve_new_rhs_res_mpc_hard_tv(N, nx_v, nu_v, nb_v, idx, ng_v, hpBAbt, hb, hpRSQ, hrq, hpDCt, hd, hux, compute_mult, hpi, hlam, ht, work); #else d_kkt_solve_new_rhs_mpc_hard_tv(N, nx_v, nu_v, nb_v, idx, ng_v, hpBAbt, hb, hpRSQ, hrq, hpDCt, hd, hux, compute_mult, hpi, hlam, ht, work); #endif } printf("\ndone\n"); gettimeofday(&tv3, NULL); // stop printf("\nsolution from low-level interface (resolve final kkt)\n\n"); printf("\nux\n\n"); for(ii=0; ii<=N; ii++) d_print_mat(1, nu_v[ii]+nx_v[ii], hux[ii], 1); printf("\npi\n\n"); for(ii=0; ii<N; ii++) d_print_mat(1, nx_v[ii+1], hpi[ii], 1); // printf("\nux\n\n"); // for(ii=0; ii<=N; ii++) // d_print_mat(1, 2*pnb_v[ii]+2*png_v[ii], hlam[ii], 1); // printf("\nux\n\n"); // for(ii=0; ii<=N; ii++) // d_print_mat(1, 2*pnb_v[ii]+2*png_v[ii], ht[ii], 1); // residuals if(compute_res) { // compute residuals d_res_mpc_hard_tv(N, nx_v, nu_v, nb_v, idx, ng_v, hpBAbt, hb, hpRSQ, hrq, hux, hpDCt, hd, hpi, hlam, ht, hrrq, hrb, hrd, &mu); // print residuals printf("\nhrrq\n\n"); for(ii=0; ii<=N; ii++) d_print_mat_e(1, nu_v[ii]+nx_v[ii], hrrq[ii], 1); printf("\nhrb\n\n"); for(ii=0; ii<N; ii++) d_print_mat_e(1, nx_v[ii+1], hrb[ii], 1); printf("\nhrd low\n\n"); for(ii=0; ii<=N; ii++) d_print_mat_e(1, nb_v[ii], hrd[ii], 1); printf("\nhrd up\n\n"); for(ii=0; ii<=N; ii++) d_print_mat_e(1, nb_v[ii], hrd[ii]+pnb_v[ii], 1); } double time_ipm = (tv1.tv_sec-tv0.tv_sec)/(nrep+0.0)+(tv1.tv_usec-tv0.tv_usec)/(nrep*1e6); double time_final = (tv3.tv_sec-tv2.tv_sec)/(nrep+0.0)+(tv3.tv_usec-tv2.tv_usec)/(nrep*1e6); printf("\nstatistics from last run\n\n"); for(jj=0; jj<kk; jj++) printf("k = %d\tsigma = %f\talpha = %f\tmu = %f\t\tmu = %e\talpha = %f\tmu = %f\tmu = %e\n", jj, stat[5*jj], stat[5*jj+1], stat[5*jj+2], stat[5*jj+2], stat[5*jj+3], stat[5*jj+4], stat[5*jj+4]); printf("\n"); printf("\n"); printf(" Average number of iterations over %d runs: %5.1f\n", nrep, kk_avg / (double) nrep); printf(" Average solution time over %d runs: %5.2e seconds (IPM)\n", nrep, time_ipm); printf(" Average solution time over %d runs: %5.2e seconds (resolve final kkt)\n", nrep, time_final); printf("\n\n"); /************************************************ * compute residuals ************************************************/ /************************************************ * free memory ************************************************/ // problem data free(A); free(B); d_free_align(b); d_free_align(x0); free(C); free(D); free(Q); free(S); free(R); free(q); free(r); // low level interface d_free_align(pA); d_free_align(b0); d_free_align(pBAbt0); d_free_align(pBAbt1); d_free_align(d0); d_free_align(d1); d_free_align(dN); d_free_align(pDCt0); d_free_align(pDCt1); free(DCN); d_free_align(pDCtN); free(idx0); free(idx1); free(idxN); d_free_align(pRSQ0); d_free_align(pRSQ1); d_free_align(pRSQN); d_free_align(rq0); d_free_align(rq1); d_free_align(rqN); d_free_align(work); free(stat); for(ii=0; ii<N; ii++) { d_free_align(hux[ii]); d_free_align(hpi[ii]); d_free_align(hlam[ii]); d_free_align(ht[ii]); d_free_align(hrb[ii]); d_free_align(hrrq[ii]); d_free_align(hrd[ii]); } d_free_align(hux[N]); d_free_align(hlam[N]); d_free_align(ht[N]); d_free_align(hrrq[N]); d_free_align(hrd[N]); #if 0 // high level interface free(rA); free(rB); free(rC); free(rD); free(rb); free(rQ); free(rQf); free(rS); free(rR); free(rq); free(rqf); free(rr); free(lb); free(rlb); free(lg); free(rlg); free(lgN); free(ub); free(rub); free(ug); free(rug); free(ugN); free(rx); free(ru); free(rpi); free(rlam); free(rt); free(rwork); #endif // new high level interface free(lb0); free(ub0); free(lb1); free(ub1); free(lbN); free(ubN); free(lg0); free(ug0); free(lg1); free(ug1); free(work1); for(ii=0; ii<N; ii++) { free(hx[ii]); free(hu[ii]); free(hpi1[ii]); free(hlam1[ii]); free(ht1[ii]); } free(hx[N]); free(hlam1[N]); free(ht1[N]); return 0; }