double ME_Model::regularized_func_grad(const double C, const Vec & x, Vec & grad) { double f = FunctionGradient(x.STLVec(), grad.STLVec()); for (size_t i = 0; i < x.Size(); i++) { f += C * fabs(x[i]); } return f; }
double ME_Model::backtracking_line_search( const Vec & x0, const Vec & grad0, const double f0, const Vec & dx, Vec & x, Vec & grad1) { double t = 1.0 / LINE_SEARCH_BETA; double f; do { t *= LINE_SEARCH_BETA; x = x0 + t * dx; f = FunctionGradient(x.STLVec(), grad1.STLVec()); // cout << "*"; } while (f > f0 + LINE_SEARCH_ALPHA * t * dot_product(dx, grad0)); return f; }
vector<double> ME_Model::perform_LBFGS(const vector<double> & x0) { const size_t dim = x0.size(); Vec x = x0; Vec grad(dim), dx(dim); double f = FunctionGradient(x.STLVec(), grad.STLVec()); Vec s[M], y[M]; double z[M]; // rho for (int iter = 0; iter < LBFGS_MAX_ITER; iter++) { Rprintf("%3d obj(err) = %f (%6.4f)", iter+1, -f, _train_error); if (_nheldout > 0) { const double heldout_logl = heldout_likelihood(); Rprintf(" heldout_logl(err) = %f (%6.4f)", heldout_logl, _heldout_error); } Rprintf("\n"); if (sqrt(dot_product(grad, grad)) < MIN_GRAD_NORM) break; dx = -1 * approximate_Hg(iter, grad, s, y, z); Vec x1(dim), grad1(dim); f = backtracking_line_search(x, grad, f, dx, x1, grad1); s[iter % M] = x1 - x; y[iter % M] = grad1 - grad; z[iter % M] = 1.0 / dot_product(y[iter % M], s[iter % M]); x = x1; grad = grad1; } return x.STLVec(); }
vector<double> ME_Model::perform_OWLQN(const vector<double> & x0, const double C) { const size_t dim = x0.size(); Vec x = x0; Vec grad(dim), dx(dim); double f = regularized_func_grad(C, x, grad); Vec s[M], y[M]; double z[M]; // rho for (int iter = 0; iter < OWLQN_MAX_ITER; iter++) { Vec pg = pseudo_gradient(x, grad, C); fprintf(stderr, "%3d obj(err) = %f (%6.4f)", iter+1, -f, _train_error); if (_nheldout > 0) { const double heldout_logl = heldout_likelihood(); fprintf(stderr, " heldout_logl(err) = %f (%6.4f)", heldout_logl, _heldout_error); } fprintf(stderr, "\n"); if (sqrt(dot_product(pg, pg)) < MIN_GRAD_NORM) break; dx = -1 * approximate_Hg(iter, pg, s, y, z); if (dot_product(dx, pg) >= 0) dx.Project(-1 * pg); Vec x1(dim), grad1(dim); f = constrained_line_search(C, x, pg, f, dx, x1, grad1); s[iter % M] = x1 - x; y[iter % M] = grad1 - grad; z[iter % M] = 1.0 / dot_product(y[iter % M], s[iter % M]); x = x1; grad = grad1; } return x.STLVec(); }