void LayerNet::check_grad ( TrainingSet *tptr , float *grad ) { int i, j, n ; float f0, f1, deriv, dot, len1, len2 ; dot = len1 = len2 = 0.0 ; f0 = trial_error ( tptr ) ; for (i=0 ; i<nhid1 ; i++) { for (j=0 ; j<=nin ; j++) { hid1_coefs[i*(nin+1)+j] += .001 ; f1 = trial_error ( tptr ) ; hid1_coefs[i*(nin+1)+j] -= .001 ; deriv = 10000.0 * (f0 - f1) ; len1 += *grad * *grad ; len2 += deriv * deriv ; dot += *grad++ * deriv ; } } for (i=0 ; i<nhid2 ; i++) { for (j=0 ; j<=nhid1 ; j++) { hid2_coefs[i*(nhid1+1)+j] += .001 ; f1 = trial_error ( tptr ) ; hid2_coefs[i*(nhid1+1)+j] -= .001 ; deriv = 10000.0 * (f0 - f1) ; len1 += *grad * *grad ; len2 += deriv * deriv ; dot += *grad++ * deriv ; } } if (nhid1 == 0) // No hidden layer n = nin ; else if (nhid2 == 0) // One hidden layer n = nhid1 ; else // Two hidden layers n = nhid2 ; for (i=0 ; i<nout ; i++) { for (j=0 ; j<=n ; j++) { out_coefs[i*(n+1)+j] += .001 ; f1 = trial_error ( tptr ) ; out_coefs[i*(n+1)+j] -= .001 ; deriv = 10000.0 * (f0 - f1) ; len1 += *grad * *grad ; len2 += deriv * deriv ; dot += *grad++ * deriv ; } } }
void LayerNet::anneal ( TrainingSet *tptr , // Training set to use struct LearnParams *lptr , // User's general learning parameters LayerNet *bestnet , // Work area used to keep best network int init // Use zero suffix (initialization) anneal parms? ) { int ntemps, niters, setback, reg, nvars, key, user_quit ; int i, iter, improved, ever_improved, itemp ; long seed, bestseed ; char msg[80] ; double tempmult, temp, fval, bestfval, starttemp, stoptemp, fquit ; SingularValueDecomp *sptr ; struct AnnealParams *aptr ; // User's annealing parameters aptr = lptr->ap ; /* The parameter 'init' is nonzero if we are initializing weights for learning. If zero we are attempting to break out of a local minimum. The main effect of this parameter is whether or not we use the zero suffix variables in the anneal parameters. A second effect is that regression is used only for initialization, not for escape. */ if (init) { ntemps = aptr->temps0 ; niters = aptr->iters0 ; setback = aptr->setback0 ; starttemp = aptr->start0 ; stoptemp = aptr->stop0 ; } else { ntemps = aptr->temps ; niters = aptr->iters ; setback = aptr->setback ; starttemp = aptr->start ; stoptemp = aptr->stop ; } /* Initialize other local parameters. Note that there is no sense using regression if there are no hidden layers. Also, regression is almost always counterproductive for local minimum escape. */ fquit = lptr->quit_err ; reg = init && nhid1 && (lptr->init != 1) ; /* Allocate the singular value decomposition object for REGRESS. Also allocate a work area for REGRESS to preserve matrix. */ if (reg) { if (nhid1 == 0) // No hidden layer nvars = nin + 1 ; else if (nhid2 == 0) // One hidden layer nvars = nhid1 + 1 ; else // Two hidden layers nvars = nhid2 + 1 ; MEMTEXT ( "ANNEAL: new SingularValueDecomp" ) ; sptr = new SingularValueDecomp ( tptr->ntrain , nvars , 1 ) ; if ((sptr == NULL) || ! sptr->ok) { memory_message ( "for annealing with regression. Try ANNEAL NOREGRESS."); if (sptr != NULL) delete sptr ; neterr = 1.0 ; // Flag failure to LayerNet::learn which called us return ; } } /* For every temperature, the center around which we will perturb is the best point so far. This is kept in 'bestnet', so initialize it to the user's starting estimate. Also, initialize 'bestfval', the best function value so far, to be the function value at that starting point. */ copy_weights ( bestnet , this ) ; // Current weights are best so far if (init) bestfval = 1.e30 ; // Force it to accept SOMETHING else bestfval = trial_error ( tptr ) ; /* This is the temperature reduction loop and the iteration within temperature loop. We use a slick trick to keep track of the best point at a given temperature. We certainly don't want to replace the best every time an improvement is had, as then we would be moving our center about, compromising the global nature of the algorithm. We could, of course, have a second work area in which we save the 'best so far for this temperature' point. But if there are a lot of variables, the usual case, this wastes memory. What we do is to save the seed of the random number generator which created the improvement. Then later, when we need to retrieve the best, simply set the random seed and regenerate it. This technique also saves a lot of copying time if many improvements are made for a single temperature. */ temp = starttemp ; tempmult = exp( log( stoptemp / starttemp ) / (ntemps-1)) ; ever_improved = 0 ; // Flags if improved at all user_quit = 0 ; // Flags user pressed ESCape for (itemp=0 ; itemp<ntemps ; itemp++) { // Temp reduction loop improved = 0 ; // Flags if this temp improved if (init) { sprintf ( msg , "\nANNEAL temp=%.2lf ", temp ) ; progress_message ( msg ) ; } for (iter=0 ; iter<niters ; iter++) { // Iters per temp loop seed = longrand () ; // Get a random seed slongrand ( seed ) ; // Brute force set it perturb (bestnet, this, temp, reg) ;// Randomly perturb about best if (reg) // If using regression, estimate fval = regress ( tptr , sptr ) ; // out weights now else // Otherwise just evaluate fval = trial_error ( tptr ) ; if (fval < bestfval) { // If this iteration improved bestfval = fval ; // then update the best so far bestseed = seed ; // and save seed to recreate it ever_improved = improved = 1 ; // Flag that we improved if (bestfval <= fquit) // If we reached the user's break ; // limit, we can quit iter -= setback ; // It often pays to keep going if (iter < 0) // at this temperature if we iter = 0 ; // are still improving } } // Loop: for all iters at a temp if (improved) { // If this temp saw improvement slongrand ( bestseed ) ; // set seed to what caused it perturb (bestnet, this, temp, reg) ;// and recreate that point copy_weights ( bestnet , this ) ; // which will become next center slongrand ( bestseed / 2 + 999 ) ; // Jog seed away from best if (init) { sprintf ( msg , " err=%.3lf%% ", 100.0 * bestfval ) ; progress_message ( msg ) ; } } if (bestfval <= fquit) // If we reached the user's break ; // limit, we can quit /*********************************************************************** if (kbhit()) { // Was a key pressed? key = getch () ; // Read it if so while (kbhit()) // Flush key buffer in case function key getch () ; // or key was held down if (key == 27) { // ESCape user_quit = 1 ; // Flags user that ESCape was pressed break ; } } ***********************************************************************/ if (user_quit) break ; temp *= tempmult ; // Reduce temp for next pass } // through this temperature loop /* The trials left this weight set and neterr in random condition. Make them equal to the best, which will be the original if we never improved. Also, if we improved and are using regression, recall that bestnet only contains the best hidden weights, as we did not bother to run regress when we updated bestnet. Do that now before returning. */ copy_weights ( this , bestnet ) ; // Return best weights in this net neterr = bestfval ; // Trials destroyed weights, err if (ever_improved && reg) neterr = regress ( tptr , sptr ) ; // regressed output weights if (reg) { MEMTEXT ( "ANNEAL: delete SingularValueDecomp" ) ; delete sptr ; } }
int LayerNet::ssg_core ( TrainingSet *tptr , // Training set to use struct LearnParams *lptr , // User's general learning parameters LayerNet *avgnet , // Work area used to keep average weights LayerNet *bestnet , // And the best so far double *work1 , // Gradient work vector double *work2 , // Ditto double *grad , // Ditto double *avg_grad , // Ditto int n_grad // Length of above vectors ) { int ntemps, niters, setback, reg, nvars, user_quit ; int i, iter, itemp, n_good, n_bad, use_grad ; char msg[80] ; double tempmult, temp, fval, bestfval, starttemp, stoptemp, fquit ; double avg_func, new_fac, gradlen, grad_weight, weight_used ; enum RandomDensity density ; SingularValueDecomp *sptr ; struct AnnealParams *aptr ; // User's annealing parameters aptr = lptr->ap ; ntemps = aptr->temps0 ; niters = aptr->iters0 ; setback = aptr->setback0 ; starttemp = aptr->start0 ; stoptemp = aptr->stop0 ; if (aptr->random0 == ANNEAL_GAUSSIAN) density = NormalDensity ; else if (aptr->random0 == ANNEAL_CAUCHY) density = CauchyDensity ; if (! (ntemps * niters)) return 0 ; /* Initialize other local parameters. Note that there is no sense using regression if there are no hidden layers. */ use_grad = (grad != NULL) ; fquit = lptr->quit_err ; reg = nhid1 ; /* Allocate the singular value decomposition object for REGRESS. Also allocate a work area for REGRESS to preserve matrix. */ if (reg) { // False if no hidden layers if (nhid2 == 0) // One hidden layer nvars = nhid1_n ; else // Two hidden layers nvars = nhid2_n ; i = (model == NETMOD_COMPLEX) ? 2 * tptr->ntrain : tptr->ntrain ; if (i < nvars) { warning_message ( "Too few training sets for regression." ) ; reg = 0 ; } else { MEMTEXT ( "SSG: new SingularValueDecomp" ) ; sptr = new SingularValueDecomp ( i , nvars , 1 ) ; if ((sptr == NULL) || ! sptr->ok) { memory_message ( "for SS(G) with regression. Using total randomization."); if (sptr != NULL) delete sptr ; reg = 0 ; } } } /* For the basic algorithm, we will keep the current 'average' network weight set in avgnet. This will be the moving center about which the perturbation is done. Although not directly related to the algorithm itself, we keep track of the best network ever found in bestnet. That is what the user will get at the end. */ copy_weights ( bestnet , this ) ; // Current weights are best so far copy_weights ( avgnet , this ) ; // Center of perturbation bestfval = trial_error ( tptr ) ; /* If this is being used to initialize the weights, make sure that they are not identically zero. Do this by setting bestfval huge so that SOMETHING is accepted later. */ if (nhid1) { i = nhid1 * nin_n ; while (i--) { if (fabs(hid1_coefs[i]) > 1.e-10) break ; } if (i < 0) bestfval = 1.e30 ; } /* Initialize by cumulating a bunch of points */ normal_message ( "Initializing..." ) ; avg_func = 0.0 ; // Mean function around center if (use_grad) { for (i=0 ; i<n_grad ; i++) // Zero the mean gradient avg_grad[i] = 0.0 ; } for (iter=0 ; iter<niters ; iter++) { // Initializing iterations perturb ( avgnet , this , starttemp , reg , density ) ; // Move point if (reg) // If using regression, estimate fval = regress ( tptr , sptr ) ; // out weights now, ignore fval if (use_grad) // Also need gradient? fval = gradient ( tptr , work1 , work2 , grad ) ; // fval redundant else if (! reg) // If reg we got fval from regress fval = trial_error ( tptr ) ; avg_func += fval ; // Cumulate mean function if (use_grad) { // Also need gradient? for (i=0 ; i<n_grad ; i++) // Cumulate mean gradient avg_grad[i] += grad[i] ; } if (fval < bestfval) { // If this iteration improved bestfval = fval ; // then update the best so far copy_weights ( bestnet , this ) ; // Keep the network if (bestfval <= fquit) // If we reached the user's goto FINISH ; // limit, we can quit } if ((user_quit = user_pressed_escape ()) != 0) goto FINISH ; } // Loop: for all initial iters avg_func /= niters ; // Mean of all points around avgnet new_fac = 1.0 / niters ; // Weight of each point sprintf ( msg , " avg=%.6lf best=%.6lf", avg_func, bestfval ) ; progress_message ( msg ) ; if (use_grad) { // Also need gradient? gradlen = 0.0 ; // Will cumulate grad length for (i=0 ; i<n_grad ; i++) { // Find gradient mean and length avg_grad[i] /= niters ; gradlen += avg_grad[i] * avg_grad[i] ; } gradlen = sqrt ( gradlen ) ; grad_weight = 0.5 ; } /* This is the temperature reduction loop and the iteration within temperature loop. */ temp = starttemp ; tempmult = exp( log( stoptemp / starttemp ) / (ntemps-1)) ; user_quit = 0 ; // Flags user pressed ESCape for (itemp=0 ; itemp<ntemps ; itemp++) { // Temp reduction loop n_good = n_bad = 0 ; // Counts better and worse sprintf ( msg , "Temp=%.3lf ", temp ) ; normal_message ( msg ) ; for (iter=0 ; iter<niters ; iter++) { // Iters per temp loop if ((n_bad >= 10) && ((double) n_good / (double) (n_good+n_bad) < 0.15)) break ; perturb ( avgnet , this , temp , reg , density ) ; // Randomly perturb about center if (use_grad) // Bias per gradient? weight_used = shift ( grad , this , grad_weight , reg ) ; if (reg) { // If using regression, estimate fval = regress ( tptr , sptr ) ; // out weights now if ((user_quit = user_pressed_escape ()) != 0) break ; if (fval >= avg_func) { // If this would raise mean ++n_bad ; // Count this bad point for user continue ; // Skip it and try again } } if (use_grad) // Need gradient, fval redundant fval = gradient ( tptr , work1 , work2 , grad ) ; else if (! reg) // If reg we got fval from regress fval = trial_error ( tptr ) ; if ((user_quit = user_pressed_escape ()) != 0) break ; if (fval >= avg_func) { // If this would raise mean ++n_bad ; // Count this bad point for user continue ; // Skip it and try again } ++n_good ; if (fval < bestfval) { // If this iteration improved bestfval = fval ; // then update the best so far copy_weights ( bestnet , this ) ; // Keep the network if (bestfval <= fquit) // If we reached the user's break ; // limit, we can quit iter -= setback ; // It often pays to keep going if (iter < 0) // at this temperature if we iter = 0 ; // are still improving } adjust ( avgnet , this , reg , new_fac ) ; // Move center slightly avg_func = new_fac * fval + (1.0 - new_fac) * avg_func ; if (use_grad) { grad_weight = new_fac * weight_used + (1.0 - new_fac) * grad_weight ; for (i=0 ; i<n_grad ; i++) // Adjust mean gradient avg_grad[i] = new_fac * grad[i] + (1.0 - new_fac) * avg_grad[i] ; } } // Loop: for all iters at a temp /* Iters within temp loop now complete */ sprintf ( msg , " %.3lf%% improved avg=%.5lf best=%.5lf", 100.0 * n_good / (double) (n_good+n_bad), avg_func, bestfval ) ; progress_message ( msg ) ; if (use_grad) { gradlen = 0.0 ; // Will cumulate grad length for (i=0 ; i<n_grad ; i++) // Find gradient length gradlen += avg_grad[i] * avg_grad[i] ; gradlen = sqrt ( gradlen ) ; sprintf ( msg , " grad=%.5lf", gradlen ) ; progress_message ( msg ) ; } if (bestfval <= fquit) // If we reached the user's break ; // limit, we can quit if (user_quit) break ; temp *= tempmult ; // Reduce temp for next pass } // through this temperature loop /* The trials left this weight set and neterr in random condition. Make them equal to the best, which will be the original if we never improved. */ FINISH: copy_weights ( this , bestnet ) ; // Return best weights in this net neterr = bestfval ; // Trials destroyed weights, err if (reg) { MEMTEXT ( "SSG: delete SingularValueDecomp" ) ; delete sptr ; } if (user_quit) return 1 ; else return 0 ; }
double LayerNet::direcmin ( TrainingSet *tptr , // Training set to use double start_err , // Error (function value) at starting coefficients int itmax , // Upper limit on number of iterations allowed double eps , // Small, but greater than machine precision double tol , // Brent's tolerance (>= sqrt machine precision) double *base , // Work area (stepping out point) double *direc ) // Work area (stepping out direction) { int key, user_quit, iter ; double step, x1, x2, x3, t1, t2, numer, denom, max_step ; double xlow, xhigh, xbest, testdist ; double current_err, err, previous_err, step_err ; double prevdist, etemp, frecent, fthirdbest, fsecbest, fbest ; double tol1, tol2, xrecent, xthirdbest, xsecbest, xmid; double first_step = 2.5 ; // Heuristically found best user_quit = 0 ; /* Take one step out in the gradient direction. First preserve original weights for use as departure point parameterized by STEP. */ preserve ( base ) ; // Establishes a base for stepping out step_out ( first_step , direc , base ) ; err = trial_error ( tptr ) ; /* If it increased, we had numerical problems computing the direction or the direction itself is too large a step. Negate the direction and use -1, 0 and 1.618 as first three steps. Otherwise use 0, 1 and 2.618 as first three steps. */ if (err > start_err) { negate_dir ( direc ) ; x1 = -first_step ; x2 = 0. ; previous_err = err ; current_err = start_err ; } else { x1 = 0. ; x2 = first_step ; previous_err = start_err ; current_err = err ; } /* At this point we have taken a single step and the function decreased. Take one more step in the golden ratio. Also keep errors lined up as 'previous_err', 'current_err' and 'err'. The corresponding abscissae will be x1, x2 and x3. */ /************************************************************************ if (kbhit()) { // Was a key pressed? key = getch () ; // Read it if so while (kbhit()) // Flush key buffer in case function key getch () ; // or key was held down if (key == 27) // ESCape return (- err) ; } ***********************************************************************/ x3 = x2 + 1.618034 * first_step ; step_out ( x3 , direc , base ) ; err = trial_error ( tptr ) ; /* We now have three points x1, x2 and x3 with corresponding errors of 'previous_err', 'current_err' and 'err'. Endlessly loop until we bracket the minimum with the outer two. */ while (err < current_err) { // As long as we are descending... /********************************************************************* if (kbhit()) { // Was a key pressed? key = getch () ; // Read it if so while (kbhit()) // Flush key buffer in case function key getch () ; // or key was held down if (key == 27) { // ESCape user_quit = 1 ; break ; } } ********************************************************************/ /* Try a parabolic fit to estimate the location of the minimum. */ t1 = (x2 - x1) * (current_err - err) ; t2 = (x2 - x3) * (current_err - previous_err) ; denom = 2. * ( t2 - t1 ) ; if (fabs ( denom ) < eps) { if (denom > 0.) denom = eps ; else denom = -eps ; } step = x2 + ((x2 - x1) * t1 - (x2 - x3) * t2) / denom ;//Here if perfect max_step = x2 + 200. * (x3 - x2) ; // Don't jump too far if ((x2 - step) * (step - x3) > 0.) { // It's between x2 and x3 step_out ( step , direc , base ) ; step_err = trial_error ( tptr ) ; if (step_err < err) { // It worked! We found min between b and c. x1 = x2 ; x2 = step ; previous_err = current_err ; current_err = step_err ; goto BOUNDED ; } else if (step_err > current_err) { // Slight miscalc. Min at x2. x3 = step ; err = step_err ; goto BOUNDED ; } else { // Parabolic fit was total waste. Use default. step = x3 + 1.618034 * (x3 - x2) ; step_out ( step , direc , base ) ; step_err = trial_error ( tptr ) ; } } else if ((x3 - step) * (step - max_step) > 0.0) { // Between x3 and lim step_out ( step , direc , base ) ; step_err = trial_error ( tptr ) ; if (step_err < err) { // Decreased, so advance by golden ratio x2 = x3 ; x3 = step ; step = x3 + 1.618034 * (x3 - x2) ; current_err = err ; err = step_err ; step_out ( step , direc , base ) ; step_err = trial_error ( tptr ) ; } } else if ((step - max_step) * (max_step - x3) >= 0.) { // Beyond limit step = max_step ; step_out ( step , direc , base ) ; step_err = trial_error ( tptr ) ; if (step_err < err) { // Decreased, so advance by golden ratio x2 = x3 ; x3 = step ; step = x3 + 1.618034 * (x3 - x2) ; current_err = err ; err = step_err ; step_out ( step , direc , base ) ; step_err = trial_error ( tptr ) ; } } else { // Wild! Reject parabolic and use golden ratio. step = x3 + 1.618034 * (x3 - x2) ; step_out ( step , direc , base ) ; step_err = trial_error ( tptr ) ; } /* Shift three points and continue endless loop */ x1 = x2 ; x2 = x3 ; x3 = step ; previous_err = current_err ; current_err = err ; err = step_err ; } // Endless stepping out loop BOUNDED: step_out ( x2 , direc , base);//Leave coefs at min if (x1 > x3) { // We may have switched direction at start. t1 = x1 ; // Brent's method which follows assumes ordered parameter. x1 = x3 ; x3 = t1 ; } if (user_quit) { update_dir ( x2 , direc ) ;// Make it be the actual dist moved return -current_err ; } /* -------------------------------------------------------------------------------- At this point we have bounded the minimum between x1 and x3. Go to the refinement stage. We use Brent's algorithm. -------------------------------------------------------------------------------- */ /* Initialize prevdist, the distance moved on the previous step, to 0 so that the 'if (fabs ( prevdist ) > tol1)' encountered on the first iteration below will fail, forcing a golden section the first time. Also initialize step to 0 to avoid a zealous compiler from pointing out that it was referenced before being set. */ prevdist = step = 0.0 ; /* We always keep the minimum bracketed between xlow and xhigh. xbest has the min function so far (or latest if tie). xsecbest and xthirdbest are the second and third best. */ xbest = xsecbest = xthirdbest = x2 ; xlow = x1 ; xhigh = x3 ; fbest = fsecbest = fthirdbest = current_err ; /* Main loop. For safety we impose a limit on iterations. */ for (iter=0 ; iter<itmax ; iter++) { xmid = 0.5 * (xlow + xhigh) ; tol1 = tol * (fabs ( xbest ) + eps) ; tol2 = 2. * tol1 ; /*************************************************************************** if (kbhit()) { // Was a key pressed? key = getch () ; // Read it if so while (kbhit()) // Flush key buffer in case function key getch () ; // or key was held down if (key == 27) { // ESCape user_quit = 1 ; break ; } } **************************************************************************/ /* The following convergence test simultaneously makes sure xhigh and xlow are close relative to tol2, and that xbest is near the midpoint. */ if (fabs ( xbest - xmid ) <= (tol2 - 0.5 * (xhigh - xlow))) break ; if (fabs ( prevdist ) > tol1) { // If we moved far enough try parabolic fit t1 = (xbest - xsecbest) * (fbest - fthirdbest) ; // Temps for the t2 = (xbest - xthirdbest) * (fbest - fsecbest) ; // parabolic estimate numer = (xbest - xthirdbest) * t2 - (xbest - xsecbest) * t1 ; denom = 2. * (t1 - t2) ; // Estimate will be numer / denom testdist = prevdist ; // Will soon verify interval is shrinking prevdist = step ; // Save for next iteration if (denom != 0.0) // Avoid dividing by zero step = numer / denom ; // This is the parabolic estimate to min else step = 1.e30 ; // Assures failure of next test if ((fabs ( step ) < fabs ( 0.5 * testdist ))// If shrinking && (step + xbest > xlow) // and within known bounds && (step + xbest < xhigh)) { // then we can use the xrecent = xbest + step ; // parabolic estimate if ((xrecent - xlow < tol2) || // If we are very close (xhigh - xrecent < tol2)) { // to known bounds if (xbest < xmid) // then stabilize step = tol1 ; else step = -tol1 ; } } else { // Parabolic estimate poor, so use golden section prevdist = (xbest >= xmid) ? xlow - xbest : xhigh - xbest ; // Poor so use step = .3819660 * prevdist ; } } else { // prevdist did not exceed tol1: we did not move far enough // to justify a parabolic fit. Use golden section. prevdist = (xbest >= xmid) ? xlow - xbest : xhigh - xbest ; step = .3819660 * prevdist ; } if (fabs (step) >= tol1) // In order to numerically justify xrecent = xbest + step ; // another trial we must move a else { // decent distance. if (step > 0.) xrecent = xbest + tol1 ; else xrecent = xbest - tol1 ; } /* At long last we have a trial point 'xrecent'. Evaluate the function. */ step_out ( xrecent , direc , base ) ; frecent = trial_error ( tptr ) ; if (frecent <= fbest) { // If we improved... if (xrecent >= xbest) // Shrink the (xlow,xhigh) interval by xlow = xbest ; // replacing the appropriate endpoint else xhigh = xbest ; xthirdbest = xsecbest ; // Update x and f values for best, xsecbest = xbest ; // second and third best xbest = xrecent ; fthirdbest = fsecbest ; fsecbest = fbest ; fbest = frecent ; } else { // We did not improve if (xrecent < xbest) // Shrink the (xlow,xhigh) interval by xlow = xrecent ; // replacing the appropriate endpoint else xhigh = xrecent ; if ((frecent <= fsecbest) // If we at least beat the second best || (xsecbest == xbest)) { // or we had a duplication xthirdbest = xsecbest ; // we can update the second and third xsecbest = xrecent ; // best, though not the best. fthirdbest = fsecbest ; // Recall that we started iters with fsecbest = frecent ; // best, sec and third all equal. } else if ((frecent <= fthirdbest) // Oh well. Maybe at least we can || (xthirdbest == xbest) // beat the third best or rid || (xthirdbest == xsecbest)) { // ourselves of a duplication xthirdbest = xrecent ; // (which is how we start the fthirdbest = frecent ; // iterations) } } } step_out ( xbest , direc , base );//Leave coefs at min update_dir ( xbest , direc ) ;// Make it be the actual distance moved if (user_quit) return -fbest ; else return fbest ; }