Beispiel #1
0
  /** \brief Finite-difference gradient check.

      This function computes a sequence of one-sided finite-difference checks for the gradient.  
      At each step of the sequence, the finite difference step size is decreased.  The output 
      compares the error 
      \f[
          \left| \frac{f(x+td) - f(x)}{t} - \langle \nabla f(x),d\rangle_{\mathcal{X}^*,\mathcal{X}}\right|.
      \f]
      if the approximation is first order. More generally, difference approximation is
      \f[
          \frac{1}{t} \sum\limits_{i=1}^m w_i f(x+t c_i d)     
      \f]
      where m = order+1, \f$w_i\f$ are the difference weights and \f$c_i\f$ are the difference steps
      @param[in]      x             is an optimization variable.
      @param[in]      d             is a direction vector.
      @param[in]      printToStream is a flag that turns on/off output.
      @param[out]     outStream     is the output stream.
      @param[in]      numSteps      is a parameter which dictates the number of finite difference steps.
      @param[in]      order         is the order of the finite difference approximation (1,2,3,4)
  */
  virtual std::vector<std::vector<Real> > checkGradient( const Vector<Real> &x,
                                                         const Vector<Real> &d,
                                                         const bool printToStream = true,
                                                         std::ostream & outStream = std::cout,
                                                         const int numSteps = ROL_NUM_CHECKDERIV_STEPS,
                                                         const int order = 1 ) {
    return checkGradient(x, x.dual(), d, printToStream, outStream, numSteps, order);
  }
Beispiel #2
0
  /** \brief Finite-difference gradient check with specified step sizes.

      This function computes a sequence of one-sided finite-difference checks for the gradient.  
      At each step of the sequence, the finite difference step size is decreased.  The output 
      compares the error 
      \f[
          \left| \frac{f(x+td) - f(x)}{t} - \langle \nabla f(x),d\rangle_{\mathcal{X}^*,\mathcal{X}}\right|.
      \f]
      if the approximation is first order. More generally, difference approximation is
      \f[
          \frac{1}{t} \sum\limits_{i=1}^m w_i f(x+t c_i d)     
      \f]
      where m = order+1, \f$w_i\f$ are the difference weights and \f$c_i\f$ are the difference steps
      @param[in]      x             is an optimization variable.
      @param[in]      d             is a direction vector.
      @param[in]      steps         is vector of steps of user-specified size.
      @param[in]      printToStream is a flag that turns on/off output.
      @param[out]     outStream     is the output stream.
      @param[in]      order         is the order of the finite difference approximation (1,2,3,4)
  */
  virtual std::vector<std::vector<Real> > checkGradient( const Vector<Real> &x,
                                                         const Vector<Real> &d,
                                                         const std::vector<Real> &steps,
                                                         const bool printToStream = true,
                                                         std::ostream & outStream = std::cout,
                                                         const int order = 1 ) {

    return checkGradient(x, x.dual(), d, steps, printToStream, outStream, order);

  }
Beispiel #3
0
/**
 * TODO:
 *
 * find the parameter values to minimize the objective function
 */
void SCGModelTrainer::Train(int numIterations)
{
	double sigma0 = 1.0e-4;
	double beta = 1.0;
	double betaMin = 1.0e-15;
	double betaMax = 1.0e100;
	double kappa = 0.0;
	double mu = 0.0;	
	double sigma;
	double theta = 0.0;
	double delta; // check delta and Delta
	double alpha, fOld, fNew, fNow;
	double Delta;
    double EPS = arma::math::eps();
    
	vec direction, gradNew, gradOld, gPlus, xPlus, xNew;
	vec x = getParameters();
				
	int numParams, numSuccess;
	bool success;
		
	fOld = errorFunction(x);
	fNow = fOld;

	gradNew = errorGradients(x);
	gradOld = gradNew;

	direction = -gradNew;

	numParams = gradNew.size();
	success = true;
	numSuccess = 0;
		
	if(gradientCheck)
	{
		checkGradient();
	}
		
	// Main loop
	for (int j = 1; j <= numIterations; j++ )
	{
		if (success)
		{
			mu = dot(direction, gradNew);
			if ( mu >= 0.0 )
			{
				direction = -gradNew;
				mu = dot(direction, gradNew);
			}
				
			kappa = dot(direction, direction);

			// eps exists in ITPP? remember to check this!
			if(kappa < EPS)
			{
				functionValue = fNow;
				setParameters(x);
				return;
			}
			sigma = sigma0 / sqrt(kappa);
			xPlus = x + (sigma * direction);
			gPlus = errorGradients(xPlus);
			theta = dot(direction, gPlus - gradNew) / sigma;	
		}
		
		delta = theta + (beta * kappa);
			
		if ( delta <= 0.0 )
		{	
		    delta = beta * kappa;
			beta = beta - ( theta / kappa );
		    //double olddelta = delta;
		    //double oldbeta = beta;
		    //beta = 2.0*(oldbeta - olddelta/kappa);
		    //delta = oldbeta*kappa - olddelta;
		}
		alpha = - ( mu / delta );
		
		xNew = x + (alpha * direction);
		fNew = errorFunction(xNew);

		Delta = 2.0 * ( fNew - fOld ) / (alpha * mu);
		if ( Delta >= 0.0 )
		{
			success = true;
			numSuccess++;
			x = xNew;
			
			// RB: Do we need to set parameters here?
			setParameters(x);
			fNow = fNew;
		}
		else
		{
			success = false;
			fNow = fOld;
		}
			
		if(display)
		{
			Rprintf("Cycle %d   Error %f  Scale %f\n", j, fNow, beta);
		}

		if(success)
		{
			if ((max(alpha * direction) < parameterTolerance) && (abs( fNew - fOld )) < errorTolerance )
			{
				functionValue = fNew;
				// setParameters(x); 
				return;
			}
			else
			{
				fOld = fNew;
				gradOld = gradNew;
				gradNew = errorGradients(x);
				
				if(dot(gradNew, gradNew) < 1e-16)
				{
					functionValue = fNew;
					// setParameters(x);
					return;
				}
			}
		}
			
		if ( Delta < 0.25 )
		{
			beta = min( 4.0 * beta, betaMax );
		}
			
		if ( Delta > 0.75 )
		{
			beta = max( 0.5 * beta, betaMin );
		}
			
		if ( numSuccess == numParams )
		{
			direction = -gradNew;
			numSuccess = 0;
		}
		else
		{
			if (success)
			{
				double gamma = dot(gradOld - gradNew, (gradNew / mu));
				direction = (gamma * direction) - gradNew;
			}
		}
	}
		
	if(display)
	{
		Rprintf("Warning: Maximum number of iterations has been exceeded\n");
	}

	functionValue = fOld;
	// setParameters(x);
	
	// Check last gradient (to make sure everything went fine
	if (gradientCheck) checkGradient();
	
	return;
}