/* * line search computes the eta scalar factor at which the error * is minimized. It begins at the actual weight and follows the given * direction. */ bool MLP::lineSearch(const dmatrix& inputs, const ivector& ids, const dvector& direction, double& xmin, dvector& newWeights) const { // following algorithms are based on Press, W.H. et.al. Numerical // Recipies in C. Chapter 10, Minimization or Maximization of // Functions. pp. 397ff static const double gold = 1.618034; // golden mean static const double glimit = 100.0; // maximum magnification // allowed for parabolic-fit // step static const double tiny = 1.0e-20; static const int itmax = 100; // maximum allowed number of // iterations to find minimum static const double cgold = 0.3819660; // golden ration static const double zeps = 1.0e-10; // small number that protects // against trying to achieve // fractional accuracy for a // minimum that happens to be // exaclty 0 const parameters& param = getParameters(); const int layers = param.hiddenUnits.size()+1; std::vector<dmatrix> mWeights(layers); newWeights.copy(weights); updateWeightIndices(newWeights,mWeights); // ----------------------------------------------------------------------- // Initial Bracket // ----------------------------------------------------------------------- double ax(0.0),bx(1.0),cx; double fa,fb,fc; double ulim,u,r,q,fu,dum; // evaluate error at eta=0 and eta=1 computeTotalError(mWeights,inputs,ids,fa); newWeights.add(direction); computeTotalError(mWeights,inputs,ids,fb); if (fb > fa) { // switch roles of a and b so that we can go // downhill in the direction from a to b ax=1.0; bx=0.0; dum=fa; fa=fb; fb=dum; } // first guess for c: cx = bx + gold*(bx-ax); newWeights.addScaled(weights,cx,direction); computeTotalError(mWeights,inputs,ids,fc); while (fb>fc) { // keep returning here until we bracket r=(bx-ax)*(fb-fc); // Compute u by parabolic extrapolation from a,b,c. q=(bx-cx)*(fb-fa); if (q>r) { // tiny is used to prevent any posible division by 0 u=bx-((bx-cx)*q-(bx-ax)*r)/(2.0*(max(q-r,tiny))); } else { u=bx-((bx-cx)*q-(bx-ax)*r)/(-2.0*(max(r-q,tiny))); } ulim=bx+glimit*(cx-bx); // We won't go farther than this. Test various possibilities: if ((bx-u)*(u-cx) > 0.0) { newWeights.addScaled(weights,u,direction); computeTotalError(mWeights,inputs,ids,fu); if (fu < fc) { // Got a minimum between b and c ax=bx; bx=u; fa=fb; fb=fu; break; } else if (fu > fb) { // Got a minimum between a and u cx=u; fc=fu; break; } u=cx+gold*(cx-bx); // Parabolic fit was no use. Use default // magnification newWeights.addScaled(weights,u,direction); computeTotalError(mWeights,inputs,ids,fu); } else if ((cx-u)*(u-ulim) > 0.0) { // Parabolic fit is between c and its allowed limit. newWeights.addScaled(weights,u,direction); computeTotalError(mWeights,inputs,ids,fu); if (fu < fc) { bx=cx; cx=u; u=cx+gold*(cx-bx); fb=fc; fc=fu; newWeights.addScaled(weights,u,direction); computeTotalError(mWeights,inputs,ids,fu); } } else if ((u-ulim)*(ulim-cx) >= 0.0) { // Limit parabolic u to // maximum allowed value u=ulim; newWeights.addScaled(weights,u,direction); computeTotalError(mWeights,inputs,ids,fu); } else { // Reject parabolic u, use default magnification u=cx+gold*(cx-bx); newWeights.addScaled(weights,u,direction); computeTotalError(mWeights,inputs,ids,fu); } // Eliminate oldest point and continue ax=bx; bx=cx; cx=u; fa=fb; fb=fc; fc=fu; } // ----------------------------------------------------------------------- // Line search: Brent's method // ----------------------------------------------------------------------- // fractional precision of found minimum static const double tol = 2.0*sqrt(std::numeric_limits<double>::epsilon()); double fv,fw,fx,etemp; double p,tol1,tol2,v,w,x,xm,a,b,d=0.0; int iter; // This will be the distance moved on the step before last double e=0.0; // a and b must be in ascending order, but input abscissas need not be. if (ax < cx) { a = ax; b = cx; } else { a = cx; b = ax; } // Initializations... x=w=v=bx; fw=fv=fx=fb; for (iter=1;iter<=itmax;++iter) { // main loop xm=0.5*(a+b); tol2=2.0*(tol1=tol*abs(x)+zeps); // test for done here if (abs(x-xm) <= (tol2-0.5*(b-a))) { xmin = x; newWeights.addScaled(weights,xmin,direction); return true; } if (abs(e) > tol1) { r=(x-w)*(fx-fv); q=(x-v)*(fx-fw); p=(x+v)*q - (x-w)*r; q=2.0*(q-r); if (q > 0.0) p = -p; q=abs(q); etemp=e; e=d; if (abs(p) >= abs(0.5*q*etemp) || p <= q*(a-x) || p >= q*(b-x)) { d = cgold*(e = (x >= xm ? a-x : b-x)); } // The above conditions determine the acceptability of the // parabolic fit. Here we take the golden section step into // the larger of the two segments else { d=p/q; u=x+d; if (u-a < tol2 || b-u < tol2) { d=(xm>=x)?abs(tol1):-abs(tol1); } } } else { d = cgold*(e=(x>=xm ? a-x : b-x)); } u=(abs(d) >= tol1 ? x+d : x+(d>0.0?abs(tol1):-abs(tol1))); // This is the one function evaluation per iteration newWeights.addScaled(weights,u,direction); computeTotalError(mWeights,inputs,ids,fu); // Now decide what to do with our function evaluation if (fu < fx) { if (u>=x) a=x; else b=x; // Housekeeping follows: v=w; w=x; x=u; fv=fw; fw=fx; fx=fu; } else { if (u<x) a=u; else b=u; if (fu <= fw || w == x) { v=w; w=u; fv=fw; fw=fu; } else if (fu <= fv || v == x || v == w) { v=u; fv=fu; } } // done with housekeeping. Back for another iteration } // end of main loop. setStatusString("Too many iterations in brent line search"); xmin=x; newWeights.addScaled(weights,xmin,direction); return false; }