double SmoothDualDecompositionFistaDescent::gradientStep(double rho, const Eigen::VectorXd& lambda, double& obj_lambda, Eigen::VectorXd& gradient, double& gradient_norm_squared, Eigen::VectorXd& y, double& omega) { double obj_new; double obj_approx; // evaluate objective, gradient and gradient norm obj_lambda = computeObjectiveAndGradient(rho, gradient, lambda); gradient_norm_squared = gradient.squaredNorm(); // backtracking do { y = lambda + gradient*(1.0/omega); obj_new = computeObjective(rho, y); obj_approx = obj_lambda+1/(2.0*omega)*gradient_norm_squared; if (obj_new < obj_approx) { omega *= _lipschitz_inc_u; } } while(obj_new < obj_approx); omega = std::max(_lipschitz_constant_optimistic, omega/_lipschitz_inc_d); return obj_new; }
// Run L-BFGS-B Solver on f(x) until completion SolverExitStatus Solver::runSolver() { SolverExitStatus status = success; // The return value. // This initial call sets up the structures for L-BFGS. callLBFGS("START"); // Repeat until we've reached the maximum number of iterations. while (true) { // Do something according to the "task" from the previous call to // L-BFGS. if (strIsEqualToCStr(task,"FG")) { // Evaluate the objective function and the gradient of the // objective at the current point x. *f = computeObjective(x); computeGradient(x); // printf("g = %f\n", g[0]); } else if (strIsEqualToCStr(task,"NEW_X")) { // Go to the next iteration and call the iterative callback // routine. iter++; //printf("iter %d\n", iter); // If we've reached the maximum number of iterations, terminate // the optimization. if (iter == maxiter) { callLBFGS("STOP"); break; } } else if (strIsEqualToCStr(task,"CONV")) break; else if (strIsEqualToCStr(task,"ABNO")) { status = abnormalTermination; break; } else if (strIsEqualToCStr(task,"ERROR")) { status = errorOnInput; break; } // Call L-BFGS again. callLBFGS(); } return status; }
// evaluate the gradient of f(x) void Solver::computeGradient(double *x) { // central difference formula double h = 1e-8; memcpy(x_tmp1, x, sizeof(double)*n); memcpy(x_tmp2, x, sizeof(double)*n); int i; for(i = 0; i < n; i++) { x_tmp1[i] += h; x_tmp2[i] -= h; g[i] = (computeObjective(x_tmp1) - computeObjective(x_tmp2)) / (2*h); x_tmp1[i] = x[i]; x_tmp2[i] = x[i]; } /* // forward difference formula double h = 1e-8; memcpy(x_tmp1, x, sizeof(double)*n); unsigned int i; for(i = 0; i < n; i++) { x_tmp1[i] += h; g[i] = (computeObjective(x_tmp1) - *f) / (h); x_tmp1[i] = x[i]; } */ }
double ProblemDescription::evaluateObjective ( const double * xi, double * g ) { TIMER_START( "gradient" ); if ( xi ){ prepareData( xi ); } else { prepareData(); } double value; if ( g ) { value = computeObjective( MatMap(g, trajectory.N(), trajectory.M() ) ); } else { value = computeObjective( MatX(0,0) ); } TIMER_STOP( "gradient" ); return value; }
size_t SmoothDualDecompositionFistaDescent::run(double rho) { //double rho_end = rho; //rho = 10; // step length double omega; omega = _lipschitz_constant_optimistic; // objectives double obj_lambda, obj_y_old; obj_y_old = -std::numeric_limits<double>::max(); // gradient Eigen::VectorXd gradient; double gnorm = std::numeric_limits<double>::max(); //// TODO: should possibly consider pruning all the labels that have an //// inf in theta, but this would get quite messy! Could use the property //// if a var is once inf it stays inf in the whole LPQP framework. //// TODO: also investigate how much slower the computations get because //// of the infs! //size_t num_inf = 0; //for (size_t v_idx=0; v_idx<_num_vertices; v_idx++) { // for (size_t k=0; k<_num_states[v_idx]; k++) { // if (std::isinf(_theta_unary[v_idx][k])) { // num_inf++; // } // } //} //std::cout << "num_inf: " << num_inf << std::endl; size_t iter = 0; while(!stoppingCriteriaMet(iter, gnorm)){ //// TODO: play around with this! Seems to work, but we need to improve this! //if ((iter % 10) == 0) { // printf("rho before: %f.\n", rho); // rho = adaptRho(_lambda, rho, rho_end, 1e-1); // printf("changed rho: %f.\n", rho); // obj_y_old = computeObjective(rho, _y); //} _y_old = _y; double obj_new; obj_new = gradientStep(rho, _lambda, obj_lambda, gradient, gnorm, _u, omega); printProgress(iter, obj_new, gnorm, 1.0/omega); if (obj_new > obj_y_old) { _y = _u; } else { _y = _y_old; } double theta = 2/(static_cast<double>(iter)+2.0); _v = _y_old+1.0/theta*(_u-_y_old); theta = 2/(static_cast<double>(iter)+3.0); _lambda = (1-theta)*_y+theta*_v; obj_y_old = obj_new; iter++; } // _y is the final solution _lambda = _y; // due to the line search we have to call inference // again before returning to actually get the latest marginals of the // final value of lambda! obj_lambda = computeObjective(rho, _lambda); setMarginalsUnary(); setMarginalsPair(); return iter; }