/** \brief Finite-difference gradient check. This function computes a sequence of one-sided finite-difference checks for the gradient. At each step of the sequence, the finite difference step size is decreased. The output compares the error \f[ \left| \frac{f(x+td) - f(x)}{t} - \langle \nabla f(x),d\rangle_{\mathcal{X}^*,\mathcal{X}}\right|. \f] if the approximation is first order. More generally, difference approximation is \f[ \frac{1}{t} \sum\limits_{i=1}^m w_i f(x+t c_i d) \f] where m = order+1, \f$w_i\f$ are the difference weights and \f$c_i\f$ are the difference steps @param[in] x is an optimization variable. @param[in] d is a direction vector. @param[in] printToStream is a flag that turns on/off output. @param[out] outStream is the output stream. @param[in] numSteps is a parameter which dictates the number of finite difference steps. @param[in] order is the order of the finite difference approximation (1,2,3,4) */ virtual std::vector<std::vector<Real> > checkGradient( const Vector<Real> &x, const Vector<Real> &d, const bool printToStream = true, std::ostream & outStream = std::cout, const int numSteps = ROL_NUM_CHECKDERIV_STEPS, const int order = 1 ) { return checkGradient(x, x.dual(), d, printToStream, outStream, numSteps, order); }
/** \brief Finite-difference gradient check with specified step sizes. This function computes a sequence of one-sided finite-difference checks for the gradient. At each step of the sequence, the finite difference step size is decreased. The output compares the error \f[ \left| \frac{f(x+td) - f(x)}{t} - \langle \nabla f(x),d\rangle_{\mathcal{X}^*,\mathcal{X}}\right|. \f] if the approximation is first order. More generally, difference approximation is \f[ \frac{1}{t} \sum\limits_{i=1}^m w_i f(x+t c_i d) \f] where m = order+1, \f$w_i\f$ are the difference weights and \f$c_i\f$ are the difference steps @param[in] x is an optimization variable. @param[in] d is a direction vector. @param[in] steps is vector of steps of user-specified size. @param[in] printToStream is a flag that turns on/off output. @param[out] outStream is the output stream. @param[in] order is the order of the finite difference approximation (1,2,3,4) */ virtual std::vector<std::vector<Real> > checkGradient( const Vector<Real> &x, const Vector<Real> &d, const std::vector<Real> &steps, const bool printToStream = true, std::ostream & outStream = std::cout, const int order = 1 ) { return checkGradient(x, x.dual(), d, steps, printToStream, outStream, order); }
/** * TODO: * * find the parameter values to minimize the objective function */ void SCGModelTrainer::Train(int numIterations) { double sigma0 = 1.0e-4; double beta = 1.0; double betaMin = 1.0e-15; double betaMax = 1.0e100; double kappa = 0.0; double mu = 0.0; double sigma; double theta = 0.0; double delta; // check delta and Delta double alpha, fOld, fNew, fNow; double Delta; double EPS = arma::math::eps(); vec direction, gradNew, gradOld, gPlus, xPlus, xNew; vec x = getParameters(); int numParams, numSuccess; bool success; fOld = errorFunction(x); fNow = fOld; gradNew = errorGradients(x); gradOld = gradNew; direction = -gradNew; numParams = gradNew.size(); success = true; numSuccess = 0; if(gradientCheck) { checkGradient(); } // Main loop for (int j = 1; j <= numIterations; j++ ) { if (success) { mu = dot(direction, gradNew); if ( mu >= 0.0 ) { direction = -gradNew; mu = dot(direction, gradNew); } kappa = dot(direction, direction); // eps exists in ITPP? remember to check this! if(kappa < EPS) { functionValue = fNow; setParameters(x); return; } sigma = sigma0 / sqrt(kappa); xPlus = x + (sigma * direction); gPlus = errorGradients(xPlus); theta = dot(direction, gPlus - gradNew) / sigma; } delta = theta + (beta * kappa); if ( delta <= 0.0 ) { delta = beta * kappa; beta = beta - ( theta / kappa ); //double olddelta = delta; //double oldbeta = beta; //beta = 2.0*(oldbeta - olddelta/kappa); //delta = oldbeta*kappa - olddelta; } alpha = - ( mu / delta ); xNew = x + (alpha * direction); fNew = errorFunction(xNew); Delta = 2.0 * ( fNew - fOld ) / (alpha * mu); if ( Delta >= 0.0 ) { success = true; numSuccess++; x = xNew; // RB: Do we need to set parameters here? setParameters(x); fNow = fNew; } else { success = false; fNow = fOld; } if(display) { Rprintf("Cycle %d Error %f Scale %f\n", j, fNow, beta); } if(success) { if ((max(alpha * direction) < parameterTolerance) && (abs( fNew - fOld )) < errorTolerance ) { functionValue = fNew; // setParameters(x); return; } else { fOld = fNew; gradOld = gradNew; gradNew = errorGradients(x); if(dot(gradNew, gradNew) < 1e-16) { functionValue = fNew; // setParameters(x); return; } } } if ( Delta < 0.25 ) { beta = min( 4.0 * beta, betaMax ); } if ( Delta > 0.75 ) { beta = max( 0.5 * beta, betaMin ); } if ( numSuccess == numParams ) { direction = -gradNew; numSuccess = 0; } else { if (success) { double gamma = dot(gradOld - gradNew, (gradNew / mu)); direction = (gamma * direction) - gradNew; } } } if(display) { Rprintf("Warning: Maximum number of iterations has been exceeded\n"); } functionValue = fOld; // setParameters(x); // Check last gradient (to make sure everything went fine if (gradientCheck) checkGradient(); return; }