void Bchart:: extend_rule(Edge* edge, Item * item, int right) { Edge* newEdge = new Edge(*edge, *item, right); if(printDebug() > 140) cerr << "extend_rule " << *edge << " " << *item << endl; const Term* itemTerm = item->term(); LeftRightGotIter lrgi(newEdge); globalGi = &lrgi; if(edge->loc() == edge->start()) { newEdge->prob() *= meEdgeProb(item->term(), newEdge, MCALC); /*stoprightp is p of stopping after seeing what currently passes for the rhs of the edge */ newEdge->rightMerit() = computeMerit(newEdge,RUCALC); delete edge; // just created; } else if(right) { newEdge->prob() *= meEdgeProb(item->term(),newEdge, RCALC); } else newEdge->prob() *= meEdgeProb(item->term(),newEdge, LCALC); if(right) { newEdge->rightMerit() = computeMerit(newEdge,RMCALC); } else { /* this is the left boundary stat for constituents that are continuing left, given the label and whatever currently appears on the left boundary of the constit. we only need this when going left */ newEdge->leftMerit() = computeMerit(newEdge,LMCALC); } if(itemTerm == Term::stopTerm) newEdge->status() = right ? 2 : 1; if(newEdge->status() == 2) newEdge->prob() *= endFactorComp(newEdge); if(printDebug() > 250 ) cerr << "Constructed " << *newEdge << "\t" << newEdge->leftMerit() << "\t" << newEdge->prob() << "\t" << newEdge->rightMerit() << endl; int tmp = curDemerits_[newEdge->start()][newEdge->loc()]; newEdge->demerits() = tmp; if(repeatRule(newEdge)) { newEdge->rightMerit() = 0; } newEdge->setmerit(); //cerr << "DEM " << tmp << " " << newEdge->merit() << endl; globalGi = NULL; if(newEdge->merit() == 0) { assert(alreadyPopedNum < 450000); alreadyPoped[alreadyPopedNum++] = newEdge; Edge* prd = newEdge->pred(); if(prd) prd->sucs().pop_front(); return; } ++ruleiCounts_; heap->insert(newEdge); if(itemTerm != Term::stopTerm) item->needme().push_back(newEdge); }
bool minimizeMeritFunction(std::vector< Matrix<B_DIM> >& B, std::vector< Matrix<U_DIM> >& U, beliefPenaltyMPC_params &problem, beliefPenaltyMPC_output &output, beliefPenaltyMPC_info &info, double penalty_coeff) { LOG_DEBUG("Solving sqp problem with penalty parameter: %2.4f", penalty_coeff); //Matrix<B_DIM,1> b0 = B[0]; std::vector< Matrix<B_DIM,B_DIM> > F(T-1); std::vector< Matrix<B_DIM,U_DIM> > G(T-1); std::vector< Matrix<B_DIM> > h(T-1); double Beps = cfg::initial_trust_box_size; double Ueps = cfg::initial_trust_box_size; double Xpos_eps = cfg::initial_Xpos_trust_box_size; double Xangle_eps = cfg::initial_Xangle_trust_box_size; double Uvel_eps = cfg::initial_Uvel_trust_box_size; double Uangle_eps = cfg::initial_Uangle_trust_box_size; double optcost; std::vector<Matrix<B_DIM> > Bopt(T); std::vector<Matrix<U_DIM> > Uopt(T-1); double merit, model_merit, new_merit; double approx_merit_improve, exact_merit_improve, merit_improve_ratio; int sqp_iter = 1, index = 0; bool success; Matrix<B_DIM,B_DIM> IB = identity<B_DIM>(); Matrix<B_DIM,B_DIM> minusIB = IB; for(int i = 0; i < B_DIM; ++i) { minusIB(i,i) = -1; } //Matrix<3*B_DIM+U_DIM, 3*B_DIM+U_DIM> HMat; Matrix<B_DIM,3*B_DIM+U_DIM> CMat; Matrix<B_DIM> eVec; //Matrix<S_DIM,S_DIM> Hess; // sqp loop while(true) { // In this loop, we repeatedly construct a linear approximation to the nonlinear belief dynamics constraint LOG_DEBUG(" sqp iter: %d", sqp_iter); merit = computeMerit(B, U, penalty_coeff); LOG_DEBUG(" merit: %4.10f", merit); // Problem linearization and definition // fill in H, f, C, e for (int t = 0; t < T-1; ++t) { Matrix<B_DIM>& bt = B[t]; Matrix<U_DIM>& ut = U[t]; linearizeBeliefDynamics(bt, ut, F[t], G[t], h[t]); // initialize f in cost function to penalize // belief dynamics slack variables index = 0; for(int i = 0; i < (B_DIM+U_DIM); ++i) { f[t][index++] = 0; } for(int i = 0; i < 2*B_DIM; ++i) { f[t][index++] = penalty_coeff; } CMat.reset(); eVec.reset(); CMat.insert<B_DIM,B_DIM>(0,0,F[t]); CMat.insert<B_DIM,U_DIM>(0,B_DIM,G[t]); CMat.insert<B_DIM,B_DIM>(0,B_DIM+U_DIM,IB); CMat.insert<B_DIM,B_DIM>(0,2*B_DIM+U_DIM,minusIB); fillColMajor(C[t], CMat); if (t == 0) { eVec.insert<B_DIM,1>(0,0,B[0]); fillCol(e[0], eVec); } eVec = -h[t] + F[t]*bt + G[t]*ut; fillCol(e[t+1], eVec); } // trust region size adjustment while(true) { //std::cout << "PAUSED INSIDE MINIMIZEMERITFUNCTION" << std::endl; //std::cin.ignore(); LOG_DEBUG(" trust region size: %2.6f %2.6f", Beps, Ueps); // solve the innermost QP here for(int t = 0; t < T-1; ++t) { Matrix<B_DIM>& bt = B[t]; Matrix<U_DIM>& ut = U[t]; // Fill in lb, ub index = 0; // x lower bound //for(int i = 0; i < X_DIM; ++i) { lb[t][index++] = MAX(xMin[i], bt[i] - Beps); } // car pos lower bound for(int i = 0; i < P_DIM; ++i) { lb[t][index++] = MAX(xMin[i], bt[i] - Xpos_eps); } // car angle lower bound lb[t][index++] = MAX(xMin[P_DIM], bt[P_DIM] - Xangle_eps); // landmark pos lower bound for(int i = C_DIM; i < X_DIM; ++i) { lb[t][index++] = MAX(xMin[i], bt[i] - Xpos_eps); } // sigma lower bound for(int i = 0; i < S_DIM; ++i) { lb[t][index] = bt[index] - Beps; index++; } // u lower bound //for(int i = 0; i < U_DIM; ++i) { lb[t][index++] = MAX(uMin[i], ut[i] - Ueps); } // u velocity lower bound lb[t][index++] = MAX(uMin[0], ut[0] - Uvel_eps); // u angle lower bound lb[t][index++] = MAX(uMin[1], ut[1] - Uangle_eps); // for lower bound on L1 slacks for(int i = 0; i < 2*B_DIM; ++i) { lb[t][index++] = 0; } index = 0; // x upper bound //for(int i = 0; i < X_DIM; ++i) { ub[t][index++] = MIN(xMax[i], bt[i] + Beps); } // car pos upper bound for(int i = 0; i < P_DIM; ++i) { ub[t][index++] = MIN(xMax[i], bt[i] + Xpos_eps); } // car angle upper bound ub[t][index++] = MIN(xMax[P_DIM], bt[P_DIM] + Xangle_eps); // landmark pos upper bound for(int i = C_DIM; i < X_DIM; ++i) { ub[t][index++] = MIN(xMax[i], bt[i] + Xpos_eps); } // sigma upper bound for(int i = 0; i < S_DIM; ++i) { ub[t][index] = bt[index] + Beps; index++; } // u upper bound //for(int i = 0; i < U_DIM; ++i) { ub[t][index++] = MIN(uMax[i], ut[i] + Ueps); } // u velocity upper bound ub[t][index++] = MIN(uMax[0], ut[0] + Uvel_eps); // u angle upper bound ub[t][index++] = MIN(uMax[1], ut[1] + Uangle_eps); } Matrix<B_DIM>& bT = B[T-1]; // Fill in lb, ub, C, e index = 0; double finalPosDelta = .1; double finalAngleDelta = M_PI/4; // xGoal lower bound for(int i = 0; i < P_DIM; ++i) { lb[T-1][index++] = xGoal[i] - finalPosDelta; } // loose on car angles and landmarks lb[T-1][index++] = nearestAngleFromTo(bT[2], xGoal[2] - finalAngleDelta); //lb[T-1][index++] = xGoal[2] - finalAngleDelta; for(int i = C_DIM; i < X_DIM; ++i) { lb[T-1][index++] = MAX(xMin[i], bT[i] - Xpos_eps); } // sigma lower bound for(int i = 0; i < S_DIM; ++i) { lb[T-1][index] = bT[index] - Beps; index++;} index = 0; // xGoal upper bound for(int i = 0; i < P_DIM; ++i) { ub[T-1][index++] = xGoal[i] + finalPosDelta; } // loose on car angles and landmarks ub[T-1][index++] = nearestAngleFromTo(bT[2], xGoal[2] + finalAngleDelta); //ub[T-1][index++] = xGoal[2] + finalAngleDelta; for(int i = C_DIM; i < X_DIM; ++i) { ub[T-1][index++] = MIN(xMax[i], bT[i] + Xpos_eps); } // sigma upper bound for(int i = 0; i < S_DIM; ++i) { ub[T-1][index] = bT[index] + Beps; index++;} // Verify problem inputs //if (!isValidInputs()) { // std::cout << "Inputs are not valid!" << std::endl; // exit(-1); //} int exitflag = beliefPenaltyMPC_solve(&problem, &output, &info); if (exitflag == 1) { for(int t = 0; t < T-1; ++t) { Matrix<B_DIM>& bt = Bopt[t]; Matrix<U_DIM>& ut = Uopt[t]; for(int i = 0; i < B_DIM; ++i) { bt[i] = z[t][i]; } for(int i = 0; i < U_DIM; ++i) { ut[i] = z[t][B_DIM+i]; } optcost = info.pobj; } for(int i = 0; i < B_DIM; ++i) { Bopt[T-1][i] = z[T-1][i]; } } else { LOG_ERROR("Some problem in solver"); throw forces_exception(); } LOG_DEBUG("Optimized cost: %4.10f", optcost); model_merit = optcost; new_merit = computeMerit(Bopt, Uopt, penalty_coeff); LOG_DEBUG("merit: %4.10f", merit); LOG_DEBUG("model_merit: %4.10f", model_merit); LOG_DEBUG("new_merit: %4.10f", new_merit); approx_merit_improve = merit - model_merit; exact_merit_improve = merit - new_merit; merit_improve_ratio = exact_merit_improve / approx_merit_improve; LOG_DEBUG("approx_merit_improve: %1.6f", approx_merit_improve); LOG_DEBUG("exact_merit_improve: %1.6f", exact_merit_improve); LOG_DEBUG("merit_improve_ratio: %1.6f", merit_improve_ratio); //std::cout << "PAUSED INSIDE minimizeMeritFunction" << std::endl; //int num; //std::cin >> num; if (approx_merit_improve < -1e-5) { LOG_ERROR("Approximate merit function got worse: %1.6f", approx_merit_improve); //LOG_ERROR("Either convexification is wrong to zeroth order, or you are in numerical trouble"); //LOG_ERROR("Failure!"); return false; } else if (approx_merit_improve < cfg::min_approx_improve) { LOG_DEBUG("Converged: improvement small enough"); B = Bopt; U = Uopt; return true; } else if ((exact_merit_improve < 0) || (merit_improve_ratio < cfg::improve_ratio_threshold)) { Beps *= cfg::trust_shrink_ratio; Ueps *= cfg::trust_shrink_ratio; Xpos_eps *= cfg::trust_shrink_ratio; Xangle_eps *= cfg::trust_shrink_ratio; Uvel_eps *= cfg::trust_shrink_ratio; Uangle_eps *= cfg::trust_shrink_ratio; LOG_DEBUG("Shrinking trust region size to: %2.6f %2.6f %2.6f %2.6f", Xpos_eps, Xangle_eps, Uvel_eps, Uangle_eps); } else { Beps *= cfg::trust_expand_ratio; Ueps *= cfg::trust_expand_ratio; Xpos_eps *= cfg::trust_expand_ratio; Xangle_eps *= cfg::trust_expand_ratio; Uvel_eps *= cfg::trust_expand_ratio; Uangle_eps *= cfg::trust_expand_ratio; B = Bopt; U = Uopt; LOG_DEBUG("Accepted, Increasing trust region size to: %2.6f %2.6f", Beps, Ueps); break; } if (Beps < cfg::min_trust_box_size && Ueps < cfg::min_trust_box_size && Xpos_eps < cfg::min_trust_box_size && Xangle_eps < cfg::min_trust_box_size && Uvel_eps < cfg::min_trust_box_size && Uangle_eps < cfg::min_trust_box_size) { LOG_DEBUG("Converged: x tolerance"); return true; } } // trust region loop sqp_iter++; } // sqp loop return success; }
bool minimizeMeritFunction(std::vector< Matrix<X_DIM> >& X, std::vector< Matrix<U_DIM> >& U, statePenaltyMPC_params& problem, statePenaltyMPC_output& output, statePenaltyMPC_info& info, double penalty_coeff, double trust_box_size) { LOG_DEBUG("Solving sqp problem with penalty parameter: %2.4f", penalty_coeff); //std::cout << "Solving sqp problem with penalty parameter: " << penalty_coeff << std::endl; Matrix<X_DIM,1> x0 = X[0]; // constrain initial state for(int i = 0; i < X_DIM; ++i) { e[i] = x0[i]; } double Xeps = trust_box_size; double Ueps = trust_box_size; double prevcost, optcost; std::vector<Matrix<X_DIM> > Xopt(T); std::vector<Matrix<U_DIM> > Uopt(T-1); double merit, model_merit, new_merit; double approx_merit_improve, exact_merit_improve, merit_improve_ratio; double constant_cost, hessian_constant, jac_constant; int sqp_iter = 1, index = 0; bool success; Matrix<X_DIM+U_DIM, X_DIM+U_DIM> QMat; Matrix<X_DIM+2*G_DIM,X_DIM+2*G_DIM> QfMat; Matrix<X_DIM> eVec; Matrix<2*G_DIM,X_DIM+2*G_DIM> AMat; Matrix<2*G_DIM,1> bVec; Matrix<X_DIM+U_DIM> zbar; // full Hessian from current timstep Matrix<XU_DIM,XU_DIM> B = identity<XU_DIM>(); Matrix<XU_DIM> G, Gopt; double cost; int idx = 0; // sqp loop while(true) { // In this loop, we repeatedly construct a linear approximation to the nonlinear belief dynamics constraint LOG_DEBUG(" sqp iter: %d", sqp_iter); merit = computeMerit(X, U, penalty_coeff); LOG_DEBUG(" merit: %4.10f", merit); // Compute gradients #ifndef FINITE_DIFFERENCE casadiComputeCostGrad(X, U, cost, G); #else finiteDifferenceCostGrad(X, U, cost, G, B); #endif // Problem linearization and definition // fill in Q, f hessian_constant = 0; jac_constant = 0; idx = 0; for (int t = 0; t < T-1; ++t) { Matrix<X_DIM>& xt = X[t]; Matrix<U_DIM>& ut = U[t]; idx = t*(X_DIM+U_DIM); //LOG_DEBUG("idx: %d",idx); QMat.reset(); for(int i = 0; i < (X_DIM+U_DIM); ++i) { double val = B(idx+i,idx+i); QMat(i,i) = (val < 0) ? 0 : val; } for(int i=0; i < (X_DIM+U_DIM); ++i) { Q[t][i] = QMat(i,i); } //fillColMajor(Q[t], QMat); zbar.insert(0,0,xt); zbar.insert(X_DIM,0,ut); for(int i = 0; i < (X_DIM+U_DIM); ++i) { hessian_constant += QMat(i,i)*zbar[i]*zbar[i]; jac_constant -= G[idx+i]*zbar[i]; f[t][i] = G[idx+i] - QMat(i,i)*zbar[i]; } } // For last stage, fill in Q, f, A, b Matrix<X_DIM>& xT = X[T-1]; idx = (T-1)*(X_DIM+U_DIM); //LOG_DEBUG("idx: %d",idx); QfMat.reset(); for(int i = 0; i < X_DIM; ++i) { double val = B(idx+i,idx+i); QfMat(i,i) = (val < 0) ? 0 : val; } for(int i=0; i < (X_DIM+2*G_DIM); ++i) { Q[T-1][i] = QfMat(i,i); } //fillColMajor(Q[T-1], QfMat); for(int i = 0; i < X_DIM; ++i) { hessian_constant += QfMat(i,i)*xT[i]*xT[i]; jac_constant -= G[idx+i]*xT[i]; f[T-1][i] = G[idx+i] - QfMat(i,i)*xT[i]; } for(int i = 0; i < 2*G_DIM; ++i) { f[T-1][X_DIM+i] = penalty_coeff; } // fill in A and b Matrix<G_DIM,X_DIM> J; linearizeg(xT, J); AMat.reset(); AMat.insert<G_DIM,X_DIM>(0,0,J); AMat.insert<G_DIM,X_DIM>(G_DIM,0,-J); fillColMajor(A, AMat); Matrix<G_DIM> delta; delta [0] = delta[1] = delta[2] = goaldelta; bVec.insert<G_DIM,1>(0,0,posGoal - g(xT) + J*xT + delta); bVec.insert<G_DIM,1>(G_DIM,0,-posGoal + g(xT) - J*xT + delta); fillColMajor(b, bVec); constant_cost = 0.5*hessian_constant + jac_constant + cost; LOG_DEBUG(" hessian cost: %4.10f", 0.5*hessian_constant); LOG_DEBUG(" jacobian cost: %4.10f", jac_constant); LOG_DEBUG(" constant cost: %4.10f", constant_cost); //std::cout << "PAUSED INSIDE MINIMIZEMERITFUNCTION" << std::endl; //int k; //std::cin >> k; // trust region size adjustment while(true) { LOG_DEBUG(" trust region size: %2.6f %2.6f", Xeps, Ueps); //std::cout << " trust region size: " << Xeps << ", " << Ueps << std::endl; // solve the innermost QP here for(int t = 0; t < T-1; ++t) { Matrix<X_DIM>& xt = X[t]; Matrix<U_DIM>& ut = U[t]; // Fill in lb, ub index = 0; // x lower bound for(int i = 0; i < X_DIM; ++i) { lb[t][index++] = MAX(xMin[i], xt[i] - Xeps); } // u lower bound for(int i = 0; i < U_DIM; ++i) { lb[t][index++] = MAX(uMin[i], ut[i] - Ueps); } index = 0; // x upper bound for(int i = 0; i < X_DIM; ++i) { ub[t][index++] = MIN(xMax[i], xt[i] + Xeps); } // u upper bound for(int i = 0; i < U_DIM; ++i) { ub[t][index++] = MIN(uMax[i], ut[i] + Ueps); } } Matrix<X_DIM>& xT = X[T-1]; // Fill in lb, ub, C, e index = 0; // xGoal lower bound for(int i = 0; i < X_DIM; ++i) { lb[T-1][index++] = MAX(xMin[i], xT[i] - Xeps); } // for lower bound on L1 slacks for(int i = 0; i < 2*G_DIM; ++i) { lb[T-1][index++] = 0; } index = 0; // xGoal upper bound for(int i = 0; i < X_DIM; ++i) { ub[T-1][index++] = MIN(xMax[i], xT[i] + Xeps); } // Verify problem inputs // if (!isValidInputs()) { // std::cout << "Inputs are not valid!" << std::endl; // exit(-1); // } //std::cerr << "PAUSING INSIDE MINIMIZE MERIT FUNCTION FOR INPUT VERIFICATION" << std::endl; //int num; //std::cin >> num; int exitflag = statePenaltyMPC_solve(&problem, &output, &info); if (exitflag == 1) { for(int t = 0; t < T-1; ++t) { Matrix<X_DIM>& xt = Xopt[t]; Matrix<U_DIM>& ut = Uopt[t]; for(int i = 0; i < X_DIM; ++i) { xt[i] = z[t][i]; } for(int i = 0; i < U_DIM; ++i) { ut[i] = z[t][X_DIM+i]; } optcost = info.pobj; } for(int i = 0; i < X_DIM; ++i) { Xopt[T-1][i] = z[T-1][i]; } } else { LOG_ERROR("Some problem in solver"); exit(-1); } LOG_DEBUG(" Optimized cost: %4.10f", optcost); model_merit = optcost + constant_cost; new_merit = computeMerit(Xopt, Uopt, penalty_coeff); LOG_DEBUG(" merit: %4.10f", merit); LOG_DEBUG(" model_merit: %4.10f", model_merit); LOG_DEBUG(" new_merit: %4.10f", new_merit); approx_merit_improve = merit - model_merit; exact_merit_improve = merit - new_merit; merit_improve_ratio = exact_merit_improve / approx_merit_improve; LOG_DEBUG(" approx_merit_improve: %1.6f", approx_merit_improve); LOG_DEBUG(" exact_merit_improve: %1.6f", exact_merit_improve); LOG_DEBUG(" merit_improve_ratio: %1.6f", merit_improve_ratio); //std::cout << "PAUSED INSIDE minimizeMeritFunction AFTER OPTIMIZATION" << std::endl; //int num; //std::cin >> num; if (approx_merit_improve < -1e-5) { //LOG_ERROR("Approximate merit function got worse: %1.6f", approx_merit_improve); //LOG_ERROR("Either convexification is wrong to zeroth order, or you are in numerical trouble"); //LOG_ERROR("Failure!"); return false; } else if (approx_merit_improve < cfg::min_approx_improve) { LOG_DEBUG("Converged: improvement small enough"); X = Xopt; U = Uopt; return true; } else if ((exact_merit_improve < 0) || (merit_improve_ratio < cfg::improve_ratio_threshold)) { Xeps *= cfg::trust_shrink_ratio; Ueps *= cfg::trust_shrink_ratio; LOG_DEBUG("Shrinking trust region size to: %2.6f %2.6f", Xeps, Ueps); } else { Xeps *= cfg::trust_expand_ratio; Ueps *= cfg::trust_expand_ratio; #ifndef FINITE_DIFFERENCE casadiComputeCostGrad(Xopt, Uopt, cost, Gopt); Matrix<XU_DIM> s, y; idx = 0; for(int t = 0; t < T-1; ++t) { for(int i=0; i < X_DIM; ++i) { s[idx+i] = Xopt[t][i] - X[t][i]; y[idx+i] = Gopt[idx+i] - G[idx+i]; } idx += X_DIM; for(int i=0; i < U_DIM; ++i) { s[idx+i] = Uopt[t][i] - U[t][i]; y[idx+i] = Gopt[idx+i] - G[idx+i]; } idx += U_DIM; } for(int i=0; i < X_DIM; ++i) { s[idx+i] = Xopt[T-1][i] - X[T-1][i]; y[idx+i] = Gopt[idx+i] - G[idx+i]; } double theta; Matrix<XU_DIM> Bs = B*s; bool decision = ((~s*y)[0] >= .2*(~s*Bs)[0]); if (decision) { theta = 1; } else { theta = (.8*(~s*Bs)[0])/((~s*Bs-~s*y)[0]); } //std::cout << "theta: " << theta << std::endl; Matrix<XU_DIM> r = theta*y + (1-theta)*Bs; Matrix<XU_DIM> rBs = theta*(y -Bs); // SR1 update //B = B + (rBs*~rBs)/((~rBs*s)[0]); // L-BFGS update B = B - (Bs*~Bs)/((~s*Bs)[0]) + (r*~r)/((~s*r)[0]); // Do not update B //B = identity<XU_DIM>(); #endif X = Xopt; U = Uopt; LOG_DEBUG("Accepted, Increasing trust region size to: %2.6f %2.6f", Xeps, Ueps); break; } if (Xeps < cfg::min_trust_box_size && Ueps < cfg::min_trust_box_size) { LOG_DEBUG("Converged: x tolerance"); return true; } } // trust region loop sqp_iter++; } // sqp loop return success; }