Exemple #1
0
double lev_marq (
   int maxits ,           // Iteration limit
   double critlim ,       // Quit if crit drops this low
   double tol ,           // Convergence tolerance
   double (*criter) (double * , double * , double * ) , // Criterion func
   int nvars ,            // Number of variables
   double *x ,            // In/out of independent variable
   SingularValueDecomp *sptr , // Work object
   double *grad ,         // Work vector n long
   double *delta ,        // Work vector n long
   double *hessian ,      // Work vector n*n long
   int progress           // Print progress?
   )
{
   int i, iter, bad_cnt, trivial_cnt, reset_ab ;
   double error, maxgrad, lambda ;
   double prev_err, improvement ;
   char msg[84] ;
   int prog_cnt=0 ;
                     
/*
   Compute the error, hessian, and error gradient at the starting point.
*/

   error = criter ( x , hessian , grad ) ;
   prev_err = error ;  // Will be 'previous iteration' error
   reset_ab = 1 ;      // Flag to use most recent good hessian and grad

/*
   Every time an iteration results in increased error, increment bad_cnt
   so that remedial action or total escape can be taken.
   Do a similar thing for improvements that are tiny via trivial_cnt.
*/

   bad_cnt = 0 ;       // Counts bad iterations for restart or exit
   trivial_cnt = 0 ;   // Counts trivial improvements for restart or exit

/*
   Initialize lambda to slightly exceed the largest magnitude diagonal
   of the Hessian.
*/

   lambda = 0.0 ;
   for (i=0 ; i<nvars ; i++) {
      if (hessian[i*nvars+i] > lambda)
         lambda = hessian[i*nvars+i] ;
      }

   lambda += 1.e-20 ;

/*
   Main iteration loop is here
*/

   iter = 0 ;
   for (;;) {  // Each iter is an epoch

#if DEBUG
      printf ( "\nLM iter %d  lambda=%lf  err=%lf", iter, lambda, error ) ;
#endif

      if ((maxits > 0)  &&  (iter++ >= maxits))
         break ;

/*
   Check current error against user's max.  Abort if user pressed ESCape
*/

      if (user_pressed_escape()) { // Was a key pressed?
         prev_err = -prev_err ;    // Flags user that ESCape was pressed
         break ;
         }

      if (error <= critlim)  // If our error is within user's limit
         break ;             // then we are done!

      if (error <= tol)      // Good in case converging to zero
         break ;

      if (reset_ab) {        // Revert to latest good Hessian and gradient?
         memcpy ( sptr->a , hessian , nvars * nvars * sizeof(double) ) ;
         memcpy ( sptr->b , grad , nvars * sizeof(double) ) ;
         }

/*
   Add lambda times the unit diagonal matrix to the Hessian.
   Solve the linear system for the correction, add that correction to the
   current point, and compute the error, Hessian, and gradient there.
*/

      for (i=0 ; i<nvars ; i++)  // Shift diagonal for stability
         sptr->a[i*nvars+i] += lambda ;

      sptr->svdcmp () ;                  // Singular value decomposition
      sptr->backsub ( 1.e-8 , delta ) ;  // Back substitution solves system

      for (i=0 ; i<nvars ; i++)
         x[i] += delta[i] ;
      error = criter ( x , sptr->a , sptr->b ) ;

#if DEBUG
      printf ( "  new=%lf", error ) ;
#if DEBUG > 3
      printf ( "\n(Dhess grad): " ) ;
      for (i=0 ; i<nvars ; i++)
         printf ( " (%lf %lf)", sptr->a[i*nvars+i], sptr->b[i] ) ;
#endif
#endif

      if (prev_err < 1.0)
         improvement = prev_err - error ;
      else 
         improvement = (prev_err - error) / prev_err ;

      if (improvement > 0.0) {
#if DEBUG
         printf ( "   GOOD = %lf%%", 100.0 * improvement ) ;
#endif

/*
   This correction resulted in improvement.  If only a trivial amount,
   check the gradient (relative to the error).  If also small, quit.
   Otherwise count these trivial improvements.  If there were a few,
   the Hessian may be bad, so retreat toward steepest descent.  If there
   were a lot, give up.
*/

         prev_err = error ;           // Keep best error here
         if (improvement < tol) {
            maxgrad = 0.0 ;
            for (i=0 ; i<nvars ; i++) {
               if (fabs ( sptr->b[i] )  >  maxgrad)
                  maxgrad = fabs ( sptr->b[i] ) ;
               }
            if (error > 1.0)
               maxgrad /= error ;
#if DEBUG
            printf ( "   Triv=%d  mg=%lf", trivial_cnt, maxgrad ) ;
#endif
            if (maxgrad <= tol)
               break ;

            if (trivial_cnt++ == 4) {
               for (i=0 ; i<nvars ; i++) {
                  if (hessian[i*nvars+i] > lambda)
                     lambda = hessian[i*nvars+i] ;
                  }
               }
            else if (trivial_cnt == 10)  // Normal escape from loop
               break ;
            }
         else
            trivial_cnt = 0 ; // Reset counter whenever good improvement

/*
   Since this step was good, update everything: the Hessian, the gradient,
   and the 'previous iteration' error.  Zero reset_ab so that we do not
   waste time copying the Hessian and gradient into sptr, as they are
   already there.  Cut lambda so that we approach Newton's method.
*/

         memcpy ( hessian , sptr->a , nvars * nvars * sizeof(double) ) ;
         memcpy ( grad , sptr->b , nvars * sizeof(double) ) ;
         reset_ab = 0 ;
         bad_cnt = 0 ;
         lambda *= 0.5 ;
         }

      else {
#if DEBUG
         printf ( "   BAD=%d", bad_cnt ) ;
#endif

/*
   This step caused an increase in error, so undo the step and set reset_ab
   to cause the previous Hessian and gradient to be used.  Increase lambda
   to revert closer to steepest descent (slower but more stable).
   If we had several bad iterations in a row, the Hessian may be bad, so
   increase lambda per the diagonal.  In the very unlikely event that a lot
   of bad iterations happened in a row, quit.  This should be very rare.
*/

         for (i=0 ; i<nvars ; i++)
            x[i] -= delta[i] ;
         reset_ab = 1 ;                   // Fetch old Hessian and gradient
         lambda *= 2.0 ;                  // Less Newton
         if (bad_cnt++ == 4) {            // If several bad in a row
            for (i=0 ; i<nvars ; i++) {   // Make sure very un-Newton
               if (hessian[i*nvars+i] > lambda)
                  lambda = hessian[i*nvars+i] ;
               }
            }
         if (bad_cnt == 10)  // Pathological escape from loop
            break ;          // Should almost never happen
         }

/*
   Diagnostic code
*/

      if (++prog_cnt >= 1000 / nvars) {
         prog_cnt = 0 ;
         sprintf ( msg , "   LM error = %lf  lambda = %lf", prev_err, lambda ) ;
         if (progress)
            write_progress ( msg ) ;
         else 
            write_non_progress ( msg ) ;
         }
      }  // This is the end of the main iteration loop

#if DEBUG
   printf ( "\n\aLM Done=%lf  Press space...", error ) ;
   while (kbhit())
      getch() ;
   getch() ;
#endif

   return prev_err ;  // This is the best error
}
Exemple #2
0
double dermin (
   int maxits ,           // Iteration limit
   double critlim ,       // Quit if crit drops this low
   double tol ,           // Convergence tolerance
   double (*criter) (double * , int , double * , double * ) , // Criterion func
   int n ,                // Number of variables
   double *x ,            // In/out of independent variable
   double ystart ,        // Input of starting function value
   double *base ,         // Work vector n long
   double *direc ,        // Work vector n long
   double *g ,            // Work vector n long
   double *h ,            // Work vector n long
   double *deriv2 ,       // Work vector n long
   int progress           // Print progress?
   )
{
   int i, iter, user_quit, convergence_counter, poor_cj_counter ;
   double fval, fbest, high, scale, t1, t2, t3, y1, y2, y3, dlen, dot1, dot2 ;
   double prev_best, toler, gam, improvement ;
   char msg[400] ;

/*
   Initialize for the local univariate criterion which may be called by
   'glob_min' and 'brentmin' to minimize along the search direction.
*/


   local_x = x ;
   local_base = base ;
   local_direc = direc ;
   local_n = n ;
   local_criter = criter ;

/*
   Initialize that the user has not pressed ESCape.
   Evaluate the function and, more importantly, its derivatives, at the
   starting point.  This call to criter puts the gradient into direc, but
   we flip its sign to get the downhill search direction.
   Also initialize the CJ algorithm by putting that vector in g and h.
*/

   user_quit = 0 ;
   fbest = criter ( x , 1 , direc , deriv2 ) ;
   prev_best = 1.e30 ;
   for (i=0 ; i<n ; i++)
      direc[i] = -direc[i] ;
   memcpy ( g , direc , n * sizeof(double) ) ;
   memcpy ( h , direc , n * sizeof(double) ) ;


#if DEBUG
   printf ( "\nDERMIN starting error = %lf", fbest ) ;
#endif

   if (fbest < 0.0) {   // If user pressed ESCape during criter call
      fbest = ystart ;
      user_quit = 1 ;
      goto FINISH ;
      }

/*
   Main loop.  For safety we impose a limit on iterations.
   There are two counters that have somewhat similar purposes.
   The first, convergence_counter, counts how many times an iteration
   failed to reduce the function value to the user's tolerance level.
   We require failure several times in a row before termination.

   The second, poor_cj_counter, has a (generally) higher threshold.
   It keeps track of poor improvement, and imposes successively small
   limits on gamma, thus forcing the algorithm back to steepest
   descent if CJ is doing poorly.
*/

   convergence_counter = 0 ;
   poor_cj_counter = 0 ;

   iter = 0 ;
   for (;;) {

      if ((maxits > 0)  &&  (iter++ >= maxits))
         break ;

      if (fbest < critlim)     // Do we satisfy user yet?
         break ;

/*
   Convergence check
*/

      if (prev_best <= 1.0)                  // If the function is small
         toler = tol ;                       // Work on absolutes
      else                                   // But if it is large
         toler = tol * prev_best ;           // Keep things relative

      if ((prev_best - fbest)  <=  toler) {  // If little improvement
         if (++convergence_counter >= 3)     // Then count how many
            break ;                          // And quit if too many
         }
      else                                   // But a good iteration
         convergence_counter = 0 ;           // Resets this counter

/*
   Does the user want to quit?
*/

      if ((user_quit = user_pressed_escape ()) != 0)
         break ;

/*
   Here we do a few quick things for housekeeping.
   We save the base for the linear search in 'base', which lets us
   parameterize from t=0.
   We find the greatest second derivative.  This makes an excellent
   scaling factor for the search direction so that the initial global
   search for a trio containing the minimum is fast.  Because this is so
   stable, we use it to bound the generally better but unstable Newton scale.
   We also compute the length of the search vector and its dot product with
   the gradient vector, as well as the directional second derivative.
   That lets us use a sort of Newton's method to help us scale the
   initial global search to be as fast as possible.  In the ideal case,
   the 't' parameter will be exactly equal to 'scale', the center point
   of the call to glob_min.
*/

      dot1 = dot2 = dlen = 0.0 ;        // For finding directional derivs
      high = 1.e-4 ;                    // For scaling glob_min
      for (i=0 ; i<n ; i++) {
         base[i] = x[i] ;               // We step out from here
         if (deriv2[i] > high)          // Keep track of second derivatives
            high = deriv2[i] ;          // For linear search via glob_min
         dot1 += direc[i] * g[i] ;      // Directional first derivative (neg)
         dot2 += direc[i] * direc[i] * deriv2[i] ; // and second
         dlen += direc[i] * direc[i] ;  // Length of search vector
         }

      dlen = sqrt ( dlen ) ;            // Actual length

#if DEBUG
      printf ( "\n(x d1 d2) d1=%lf d2=%lf len=%lf rat=%lf h=%lf:",
               dot1, dot2, dlen, dot1 / dot2, 1.5 / high ) ;
#endif

#if DEBUG > 1
      for (i=0 ; i<n ; i++)
         printf ( "( %lf %lf %lf)", x[i], direc[i], deriv2[i] ) ;
#endif


/*
   The search direction is in 'direc' and the maximum second derivative
   is in 'high'.  That stable value makes a good approximate scaling factor.
   The ideal Newton scaling factor is numerically unstable.
   So compute the Newton ideal, then bound it to be near the less ideal
   but far more stable maximum second derivative.
   Pass the first function value, corresponding to t=0, to the routine
   in *y2 and flag this by using a negative npts.
*/

      scale = dot1 / dot2 ;          // Newton's ideal but unstable scale
      high = 1.5 / high ;            // Less ideal but more stable heuristic
      if (high < 1.e-4)              // Subjectively keep it realistic
         high = 1.e-4 ;

      if (scale < 0.0)               // This is truly pathological
         scale = high ;              // So stick with old reliable
      else if (scale < 0.1 * high)   // Bound the Newton scale
         scale = 0.1 * high ;        // To be close to the stable scale
      else if (scale > 10.0 * high)  // Bound it both above and below
         scale = 10.0 * high ;

      y2 = prev_best = fbest ;

#if DEBUG
      printf ( "\nStarting GLOBAL " ) ;
#endif

      user_quit = glob_min ( 0.0 , 2.0 * scale , -3 , 0 , critlim ,
                  univar_crit , &t1 , &y1 , &t2 , &y2 , &t3 , &y3 , progress) ;

#if DEBUG
      printf ( "\nGLOBAL t=%lf  f=%lf", t2 / scale , y2 ) ;
#endif

      if (user_quit  ||  (y2 < critlim)) { // ESCape or good enough already?
         if (y2 < fbest) {                 // If global caused improvement
            for (i=0 ; i<n ; i++)          // Implement that improvement
               x[i] = base[i] + t2 * direc[i] ;
            fbest = y2 ;
            }
         else {                            // Else revert to starting point
            for (i=0 ; i<n ; i++)
               x[i] = base[i] ;
            }
         break ;
         }

/*
   We just used a crude global strategy to find three points that
   bracket the minimum.  Refine using Brent's method.
   If we are possibly near the end, as indicated by the convergence_counter
   being nonzero, then try extra hard.
*/

      if (convergence_counter)
         fbest = brentmin ( 20 , critlim , tol , 1.e-7 ,
                            univar_crit , &t1 , &t2 , &t3 , y2 , progress ) ;
      else 
         fbest = brentmin ( 10 , critlim , 10.0 * tol , 1.e-5 ,
                            univar_crit , &t1 , &t2 , &t3 , y2 , progress ) ;

#if DEBUG
         printf ( "\nBRENT t=%lf  f=%lf", t2 / scale , fbest ) ;
#endif

/*
   We just completed the global and refined search.
   Update the current point to reflect the minimum obtained.
   Then evaluate the error and its derivatives there.  (The linear optimizers
   only evaluated the error, not its derivatives.)
   If the user pressed ESCape during dermin, fbest will be returned
   negative.
*/

      for (i=0 ; i<n ; i++)
         x[i] = base[i] + t2 * direc[i] ;

      if (fbest < 0.0) {              // If user pressed ESCape
         fbest = -fbest ;
         user_quit = 1 ;
         break ;
         }

      improvement = (prev_best - fbest) / prev_best ;

#if DEBUG
      printf ( "\nDIREC improvement = %lf %%",
               100. * improvement ) ;
#endif

#if DEBUG > 1
      printf ( "\a..." ) ;
      getch () ;
#endif

      if (fbest < critlim)     // Do we satisfy user yet?
         break ;

      fval = criter ( x , 1 , direc , deriv2 ) ; // Need derivs now
      for (i=0 ; i<n ; i++)                      // Flip sign to get
         direc[i] = -direc[i] ;                  // negative gradient

      if (fval < 0.0) {                          // If user pressed ESCape
         user_quit = 1 ;
         break ;
         }

      sprintf ( msg , "scale=%lf f=%le dlen=%le improvement=%lf%%",
                t2 / scale , fval, dlen, 100.0 * improvement ) ;
      if (progress)
         write_progress ( msg ) ;
      else 
         write_non_progress ( msg ) ;

#if DEBUG
      printf ( "\nf=%lf at (", fval ) ;
#endif

#if DEBUG > 1
      for (i=0 ; i<n ; i++)
         printf ( " %lf", x[i] ) ;
      printf ( ")...\a" ) ;
      getch () ;
#endif

      gam = gamma ( n , g , direc ) ;

#if DEBUG
      dlen = 0.0 ;
      for (i=0 ; i<n ; i++)
         dlen += direc[i] * direc[i] ;
      printf ( "\nGamma = %lf  with grad len = %lf", gam, sqrt(dlen) ) ;
#endif

      if (gam < 0.0)
         gam = 0.0 ;

      if (gam > 10.0)             // limit gamma
         gam = 10.0 ;

      if (improvement < 0.001)    // Count how many times we
         ++poor_cj_counter ;      // got poor improvement
      else                        // in a row
         poor_cj_counter = 0 ;

      if (poor_cj_counter >= 2) { // If several times
         if (gam > 1.0)           // limit gamma
            gam = 1.0 ;
         }

      if (poor_cj_counter >= 6) { // If too many times
         poor_cj_counter = 0 ;    // set gamma to 0
         gam = 0.0 ;              // to use steepest descent (gradient)
#if DEBUG
         printf ( "\nSetting Gamma=0" ) ;
#endif
         }

      find_new_dir ( n , gam , g , h , direc ) ; // Compute search direction


      } // Main loop

FINISH:
   if (user_quit)
      return -fbest ;
   else 
      return fbest ;
}
int glob_min (
   double low ,           // Lower limit for search
   double high ,          // Upper limit
   int npts ,             // Number of points to try
   int log_space ,        // Space by log?
   double critlim ,       // Quit global if crit drops this low
   int (*criter) (double , double *) , // Criterion function
   double *x1 ,
   double *y1 ,           // Lower X value and function there
   double *x2 ,
   double *y2 ,           // Middle (best)
   double *x3 ,
   double *y3             // And upper
   )
{
   int i, ibest, turned_up, know_first_point, user_quit ;
   double x, y, rate, previous ;

   user_quit = 0 ;

   if (npts < 0) {
      npts = -npts ;
      know_first_point = 1 ;
      }
   else 
      know_first_point = 0 ;

   if (log_space)
      rate = exp ( log (high / low) / (npts - 1) ) ;
   else 
      rate = (high - low) / (npts - 1) ;

   x = low ;

   previous = 0.0 ; // Avoids "use before set" compiler warnings
   ibest = -1 ;     // For proper critlim escape
   turned_up = 0 ;  // Must know if function increased after min

   for (i=0 ; i<npts ; i++) {

      if (i  ||  ! know_first_point)
         user_quit = criter ( x , &y ) ;
      else
         y = *y2 ;

      if ((i == 0)  ||  (y < *y2)) {  // Keep track of best here
         ibest = i ;
         *x2 = x ;
         *y2 = y ;
         *y1 = previous ;  // Function value to its left
         turned_up = 0 ;   // Flag that min is not yet bounded
         }

      else if (i == (ibest+1)) { // Didn't improve so this point may
         *y3 = y ;               // be the right neighbor of the best
         turned_up = 1 ;         // Flag that min is bounded
         }

      previous = y ;             // Keep track for left neighbor of best

      if (! user_quit)
         user_quit = user_pressed_escape () ;

      if ((user_quit  ||  (*y2 <= critlim))  &&  (ibest > 0)  &&  turned_up)
         break ; // Done if (abort or good enough) and both neighbors found

      if (user_quit)          // Alas, both neighbors not found
         return user_quit ;   // Flag that the other 2 pts not there

      if (log_space)
         x *= rate ;
      else 
         x += rate ;
      }

/*
   At this point we have a minimum (within low,high) at (x2,y2).
   Compute x1 and x3, its neighbors.
   We already know y1 and y3 (unless the minimum is at an endpoint!).
*/

   if (log_space) {
      *x1 = *x2 / rate ;
      *x3 = *x2 * rate ;
      }
   else {
      *x1 = *x2 - rate ;
      *x3 = *x2 + rate ;
      }

/*
   Normally we would now be done.  However, the careless user may have
   given us a bad x range (low,high) for the global search.
   If the function was still decreasing at an endpoint, bail out the
   user by continuing the search.
*/

   if (! turned_up) { // Must extend to the right (larger x)
      for (;;) {      // Endless loop goes as long as necessary

         user_quit = user_pressed_escape () ;

         if (! user_quit)
            user_quit = criter ( *x3 , y3 ) ;

         if (user_quit)          // Alas, both neighbors not found
            return user_quit ;   // Flag that the other 2 pts not there

         if (*y3 > *y2)  // If function increased we are done
            break ;
         if ((*y1 == *y2)  &&  (*y2 == *y3)) // Give up if flat
            break ;

         *x1 = *x2 ;      // Shift all points
         *y1 = *y2 ;
         *x2 = *x3 ;
         *y2 = *y3 ;

         rate *= 3.0 ;    // Step further each time
         if (log_space)   // And advance to new frontier
            *x3 *= rate ;
         else 
            *x3 += rate ;
         }
      }

   else if (ibest == 0) {  // Must extend to the left (smaller x)
      for (;;) {           // Endless loop goes as long as necessary

         user_quit = user_pressed_escape () ;

         if (! user_quit)
            user_quit = criter ( *x1 , y1 ) ;

         if (user_quit)         // Alas, both neighbors not found
            return user_quit ;  // Flag that the other 2 pts not there

         if (*y1 > *y2)   // If function increased we are done
            break ;
         if ((*y1 == *y2)  &&  (*y2 == *y3)) // Give up if flat
            break ;

         *x3 = *x2 ;      // Shift all points
         *y3 = *y2 ;
         *x2 = *x1 ;
         *y2 = *y1 ;

         rate *= 3.0 ;    // Step further each time
         if (log_space)   // And advance to new frontier
            *x1 /= rate ;
         else 
            *x1 -= rate ;
         }
      }

   return 0 ;
}