Example #1
0
int main(int argc, char **argv)
{
    int n;
    int m;
    minlbfgsstate state;
    minlbfgsreport rep;
    ap::real_1d_array s;
    double x;
    double y;

    
    //
    // Function minimized:
    //     F = exp(x-1) + exp(1-x) + (y-x)^2
    // N = 2 - task dimension
    // M = 1 - build tank-1 model
    //
    n = 2;
    m = 1;
    s.setlength(2);
    s(0) = 10;
    s(1) = ap::randomreal()-0.5;
    minlbfgscreate(n, m, s, state);
    minlbfgssetcond(state, 0.0, 0.0, 0.0001, 0);
    minlbfgssetxrep(state, true);
    printf("\n\nF = exp(x-1) + exp(1-x) + (y-x)^2\n");
    printf("OPTIMIZATION STARTED\n");
    while(minlbfgsiteration(state))
    {
        if( state.needfg )
        {
            x = state.x(0);
            y = state.x(1);
            state.f = exp(x-1)+exp(1-x)+ap::sqr(y-x);
            state.g(0) = exp(x-1)-exp(1-x)+2*(x-y);
            state.g(1) = 2*(y-x);
        }
        if( state.xupdated )
        {
            printf("    F(%8.5lf,%8.5lf)=%0.5lf\n",
                double(state.x(0)),
                double(state.x(1)),
                double(state.f));
        }
    }
    printf("OPTIMIZATION STOPPED\n");
    minlbfgsresults(state, s, rep);
    
    //
    // output results
    //
    printf("X = %4.2lf (should be 1.00)\n",
        double(s(0)));
    printf("Y = %4.2lf (should be 1.00)\n\n\n",
        double(s(1)));
    return 0;
}
Example #2
0
/*************************************************************************
Neural network training using early stopping (base algorithm - L-BFGS with
regularization).

INPUT PARAMETERS:
    Network     -   neural network with initialized geometry
    TrnXY       -   training set
    TrnSize     -   training set size
    ValXY       -   validation set
    ValSize     -   validation set size
    Decay       -   weight decay constant, >=0.001
                    Decay term 'Decay*||Weights||^2' is added to error
                    function.
                    If you don't know what Decay to choose, use 0.001.
    Restarts    -   number of restarts from random position, >0.
                    If you don't know what Restarts to choose, use 2.

OUTPUT PARAMETERS:
    Network     -   trained neural network.
    Info        -   return code:
                    * -2, if there is a point with class number
                          outside of [0..NOut-1].
                    * -1, if wrong parameters specified
                          (NPoints<0, Restarts<1, ...).
                    *  2, task has been solved, stopping  criterion  met -
                          sufficiently small step size.  Not expected  (we
                          use  EARLY  stopping)  but  possible  and not an
                          error.
                    *  6, task has been solved, stopping  criterion  met -
                          increasing of validation set error.
    Rep         -   training report

NOTE:

Algorithm stops if validation set error increases for  a  long  enough  or
step size is small enought  (there  are  task  where  validation  set  may
decrease for eternity). In any case solution returned corresponds  to  the
minimum of validation set error.

  -- ALGLIB --
     Copyright 10.03.2009 by Bochkanov Sergey
*************************************************************************/
void mlptraines(multilayerperceptron& network,
     const ap::real_2d_array& trnxy,
     int trnsize,
     const ap::real_2d_array& valxy,
     int valsize,
     double decay,
     int restarts,
     int& info,
     mlpreport& rep)
{
    int i;
    int j;
    int pass;
    int nin;
    int nout;
    int wcount;
    ap::real_1d_array w;
    ap::real_1d_array wbest;
    double e;
    double v;
    double ebest;
    ap::real_1d_array wfinal;
    double efinal;
    int itbest;
    lbfgsreport internalrep;
    lbfgsstate state;
    double wstep;

    wstep = 0.001;
    
    //
    // Test inputs, parse flags, read network geometry
    //
    if( trnsize<=0||valsize<=0||restarts<1||ap::fp_less(decay,0) )
    {
        info = -1;
        return;
    }
    mlpproperties(network, nin, nout, wcount);
    if( mlpissoftmax(network) )
    {
        for(i = 0; i <= trnsize-1; i++)
        {
            if( ap::round(trnxy(i,nin))<0||ap::round(trnxy(i,nin))>=nout )
            {
                info = -2;
                return;
            }
        }
        for(i = 0; i <= valsize-1; i++)
        {
            if( ap::round(valxy(i,nin))<0||ap::round(valxy(i,nin))>=nout )
            {
                info = -2;
                return;
            }
        }
    }
    info = 2;
    
    //
    // Prepare
    //
    mlpinitpreprocessor(network, trnxy, trnsize);
    w.setbounds(0, wcount-1);
    wbest.setbounds(0, wcount-1);
    wfinal.setbounds(0, wcount-1);
    efinal = ap::maxrealnumber;
    for(i = 0; i <= wcount-1; i++)
    {
        wfinal(i) = 0;
    }
    
    //
    // Multiple starts
    //
    rep.ncholesky = 0;
    rep.nhess = 0;
    rep.ngrad = 0;
    for(pass = 1; pass <= restarts; pass++)
    {
        
        //
        // Process
        //
        mlprandomize(network);
        ebest = mlperror(network, valxy, valsize);
        ap::vmove(&wbest(0), &network.weights(0), ap::vlen(0,wcount-1));
        itbest = 0;
        ap::vmove(&w(0), &network.weights(0), ap::vlen(0,wcount-1));
        minlbfgs(wcount, ap::minint(wcount, 50), w, 0.0, 0.0, wstep, 0, 0, state);
        while(minlbfgsiteration(state))
        {
            
            //
            // Calculate gradient
            //
            ap::vmove(&network.weights(0), &state.x(0), ap::vlen(0,wcount-1));
            mlpgradnbatch(network, trnxy, trnsize, state.f, state.g);
            v = ap::vdotproduct(&network.weights(0), &network.weights(0), ap::vlen(0,wcount-1));
            state.f = state.f+0.5*decay*v;
            ap::vadd(&state.g(0), &network.weights(0), ap::vlen(0,wcount-1), decay);
            rep.ngrad = rep.ngrad+1;
            
            //
            // Validation set
            //
            if( state.xupdated )
            {
                ap::vmove(&network.weights(0), &w(0), ap::vlen(0,wcount-1));
                e = mlperror(network, valxy, valsize);
                if( ap::fp_less(e,ebest) )
                {
                    ebest = e;
                    ap::vmove(&wbest(0), &network.weights(0), ap::vlen(0,wcount-1));
                    itbest = internalrep.iterationscount;
                }
                if( internalrep.iterationscount>30&&ap::fp_greater(internalrep.iterationscount,1.5*itbest) )
                {
                    info = 6;
                    break;
                }
            }
        }
        minlbfgsresults(state, w, internalrep);
        
        //
        // Compare with final answer
        //
        if( ap::fp_less(ebest,efinal) )
        {
            ap::vmove(&wfinal(0), &wbest(0), ap::vlen(0,wcount-1));
            efinal = ebest;
        }
    }
    
    //
    // The best network
    //
    ap::vmove(&network.weights(0), &wfinal(0), ap::vlen(0,wcount-1));
}
Example #3
0
/*************************************************************************
Neural network training  using  modified  Levenberg-Marquardt  with  exact
Hessian calculation and regularization. Subroutine trains  neural  network
with restarts from random positions. Algorithm is well  suited  for  small
and medium scale problems (hundreds of weights).

INPUT PARAMETERS:
    Network     -   neural network with initialized geometry
    XY          -   training set
    NPoints     -   training set size
    Decay       -   weight decay constant, >=0.001
                    Decay term 'Decay*||Weights||^2' is added to error
                    function.
                    If you don't know what Decay to choose, use 0.001.
    Restarts    -   number of restarts from random position, >0.
                    If you don't know what Restarts to choose, use 2.

OUTPUT PARAMETERS:
    Network     -   trained neural network.
    Info        -   return code:
                    * -9, if internal matrix inverse subroutine failed
                    * -2, if there is a point with class number
                          outside of [0..NOut-1].
                    * -1, if wrong parameters specified
                          (NPoints<0, Restarts<1).
                    *  2, if task has been solved.
    Rep         -   training report

  -- ALGLIB --
     Copyright 10.03.2009 by Bochkanov Sergey
*************************************************************************/
void mlptrainlm(multilayerperceptron& network,
     const ap::real_2d_array& xy,
     int npoints,
     double decay,
     int restarts,
     int& info,
     mlpreport& rep)
{
    int nin;
    int nout;
    int wcount;
    double lmftol;
    double lmsteptol;
    int i;
    int j;
    int k;
    int mx;
    double v;
    double e;
    double enew;
    double xnorm2;
    double stepnorm;
    ap::real_1d_array g;
    ap::real_1d_array d;
    ap::real_2d_array h;
    ap::real_2d_array hmod;
    ap::real_2d_array z;
    bool spd;
    double nu;
    double lambda;
    double lambdaup;
    double lambdadown;
    int cvcnt;
    double cvrelcnt;
    lbfgsreport internalrep;
    lbfgsstate state;
    ap::real_1d_array x;
    ap::real_1d_array y;
    ap::real_1d_array wbase;
    double wstep;
    ap::real_1d_array wdir;
    ap::real_1d_array wt;
    ap::real_1d_array wx;
    int pass;
    ap::real_1d_array wbest;
    double ebest;

    mlpproperties(network, nin, nout, wcount);
    lambdaup = 10;
    lambdadown = 0.3;
    lmftol = 0.001;
    lmsteptol = 0.001;
    
    //
    // Test for inputs
    //
    if( npoints<=0||restarts<1 )
    {
        info = -1;
        return;
    }
    if( mlpissoftmax(network) )
    {
        for(i = 0; i <= npoints-1; i++)
        {
            if( ap::round(xy(i,nin))<0||ap::round(xy(i,nin))>=nout )
            {
                info = -2;
                return;
            }
        }
    }
    decay = ap::maxreal(decay, mindecay);
    info = 2;
    
    //
    // Initialize data
    //
    rep.ngrad = 0;
    rep.nhess = 0;
    rep.ncholesky = 0;
    
    //
    // General case.
    // Prepare task and network. Allocate space.
    //
    mlpinitpreprocessor(network, xy, npoints);
    g.setbounds(0, wcount-1);
    h.setbounds(0, wcount-1, 0, wcount-1);
    hmod.setbounds(0, wcount-1, 0, wcount-1);
    wbase.setbounds(0, wcount-1);
    wdir.setbounds(0, wcount-1);
    wbest.setbounds(0, wcount-1);
    wt.setbounds(0, wcount-1);
    wx.setbounds(0, wcount-1);
    ebest = ap::maxrealnumber;
    
    //
    // Multiple passes
    //
    for(pass = 1; pass <= restarts; pass++)
    {
        
        //
        // Initialize weights
        //
        mlprandomize(network);
        
        //
        // First stage of the hybrid algorithm: LBFGS
        //
        ap::vmove(&wbase(0), &network.weights(0), ap::vlen(0,wcount-1));
        minlbfgs(wcount, ap::minint(wcount, 5), wbase, 0.0, 0.0, 0.0, ap::maxint(25, wcount), 0, state);
        while(minlbfgsiteration(state))
        {
            
            //
            // gradient
            //
            ap::vmove(&network.weights(0), &state.x(0), ap::vlen(0,wcount-1));
            mlpgradbatch(network, xy, npoints, state.f, state.g);
            
            //
            // weight decay
            //
            v = ap::vdotproduct(&network.weights(0), &network.weights(0), ap::vlen(0,wcount-1));
            state.f = state.f+0.5*decay*v;
            ap::vadd(&state.g(0), &network.weights(0), ap::vlen(0,wcount-1), decay);
            
            //
            // next iteration
            //
            rep.ngrad = rep.ngrad+1;
        }
        minlbfgsresults(state, wbase, internalrep);
        ap::vmove(&network.weights(0), &wbase(0), ap::vlen(0,wcount-1));
        
        //
        // Second stage of the hybrid algorithm: LM
        //
        // Initialize H with identity matrix,
        // G with gradient,
        // E with regularized error.
        //
        mlphessianbatch(network, xy, npoints, e, g, h);
        v = ap::vdotproduct(&network.weights(0), &network.weights(0), ap::vlen(0,wcount-1));
        e = e+0.5*decay*v;
        ap::vadd(&g(0), &network.weights(0), ap::vlen(0,wcount-1), decay);
        for(k = 0; k <= wcount-1; k++)
        {
            h(k,k) = h(k,k)+decay;
        }
        rep.nhess = rep.nhess+1;
        lambda = 0.001;
        nu = 2;
        while(true)
        {
            
            //
            // 1. HMod = H+lambda*I
            // 2. Try to solve (H+Lambda*I)*dx = -g.
            //    Increase lambda if left part is not positive definite.
            //
            for(i = 0; i <= wcount-1; i++)
            {
                ap::vmove(&hmod(i, 0), &h(i, 0), ap::vlen(0,wcount-1));
                hmod(i,i) = hmod(i,i)+lambda;
            }
            spd = spdmatrixcholesky(hmod, wcount, true);
            rep.ncholesky = rep.ncholesky+1;
            if( !spd )
            {
                lambda = lambda*lambdaup*nu;
                nu = nu*2;
                continue;
            }
            if( !spdmatrixcholeskysolve(hmod, g, wcount, true, wdir) )
            {
                lambda = lambda*lambdaup*nu;
                nu = nu*2;
                continue;
            }
            ap::vmul(&wdir(0), ap::vlen(0,wcount-1), -1);
            
            //
            // Lambda found.
            // 1. Save old w in WBase
            // 1. Test some stopping criterions
            // 2. If error(w+wdir)>error(w), increase lambda
            //
            ap::vadd(&network.weights(0), &wdir(0), ap::vlen(0,wcount-1));
            xnorm2 = ap::vdotproduct(&network.weights(0), &network.weights(0), ap::vlen(0,wcount-1));
            stepnorm = ap::vdotproduct(&wdir(0), &wdir(0), ap::vlen(0,wcount-1));
            stepnorm = sqrt(stepnorm);
            enew = mlperror(network, xy, npoints)+0.5*decay*xnorm2;
            if( ap::fp_less(stepnorm,lmsteptol*(1+sqrt(xnorm2))) )
            {
                break;
            }
            if( ap::fp_greater(enew,e) )
            {
                lambda = lambda*lambdaup*nu;
                nu = nu*2;
                continue;
            }
            
            //
            // Optimize using inv(cholesky(H)) as preconditioner
            //
            if( !rmatrixtrinverse(hmod, wcount, true, false) )
            {
                
                //
                // if matrix can't be inverted then exit with errors
                // TODO: make WCount steps in direction suggested by HMod
                //
                info = -9;
                return;
            }
            ap::vmove(&wbase(0), &network.weights(0), ap::vlen(0,wcount-1));
            for(i = 0; i <= wcount-1; i++)
            {
                wt(i) = 0;
            }
            minlbfgs(wcount, wcount, wt, 0.0, 0.0, 0.0, 5, 0, state);
            while(minlbfgsiteration(state))
            {
                
                //
                // gradient
                //
                for(i = 0; i <= wcount-1; i++)
                {
                    v = ap::vdotproduct(&state.x(i), &hmod(i, i), ap::vlen(i,wcount-1));
                    network.weights(i) = wbase(i)+v;
                }
                mlpgradbatch(network, xy, npoints, state.f, g);
                for(i = 0; i <= wcount-1; i++)
                {
                    state.g(i) = 0;
                }
                for(i = 0; i <= wcount-1; i++)
                {
                    v = g(i);
                    ap::vadd(&state.g(i), &hmod(i, i), ap::vlen(i,wcount-1), v);
                }
                
                //
                // weight decay
                // grad(x'*x) = A'*(x0+A*t)
                //
                v = ap::vdotproduct(&network.weights(0), &network.weights(0), ap::vlen(0,wcount-1));
                state.f = state.f+0.5*decay*v;
                for(i = 0; i <= wcount-1; i++)
                {
                    v = decay*network.weights(i);
                    ap::vadd(&state.g(i), &hmod(i, i), ap::vlen(i,wcount-1), v);
                }
                
                //
                // next iteration
                //
                rep.ngrad = rep.ngrad+1;
            }
            minlbfgsresults(state, wt, internalrep);
            
            //
            // Accept new position.
            // Calculate Hessian
            //
            for(i = 0; i <= wcount-1; i++)
            {
                v = ap::vdotproduct(&wt(i), &hmod(i, i), ap::vlen(i,wcount-1));
                network.weights(i) = wbase(i)+v;
            }
            mlphessianbatch(network, xy, npoints, e, g, h);
            v = ap::vdotproduct(&network.weights(0), &network.weights(0), ap::vlen(0,wcount-1));
            e = e+0.5*decay*v;
            ap::vadd(&g(0), &network.weights(0), ap::vlen(0,wcount-1), decay);
            for(k = 0; k <= wcount-1; k++)
            {
                h(k,k) = h(k,k)+decay;
            }
            rep.nhess = rep.nhess+1;
            
            //
            // Update lambda
            //
            lambda = lambda*lambdadown;
            nu = 2;
        }
        
        //
        // update WBest
        //
        v = ap::vdotproduct(&network.weights(0), &network.weights(0), ap::vlen(0,wcount-1));
        e = 0.5*decay*v+mlperror(network, xy, npoints);
        if( ap::fp_less(e,ebest) )
        {
            ebest = e;
            ap::vmove(&wbest(0), &network.weights(0), ap::vlen(0,wcount-1));
        }
    }
    
    //
    // copy WBest to output
    //
    ap::vmove(&network.weights(0), &wbest(0), ap::vlen(0,wcount-1));
}
Example #4
0
/*************************************************************************
Neural  network  training  using  L-BFGS  algorithm  with  regularization.
Subroutine  trains  neural  network  with  restarts from random positions.
Algorithm  is  well  suited  for  problems  of  any dimensionality (memory
requirements and step complexity are linear by weights number).

INPUT PARAMETERS:
    Network     -   neural network with initialized geometry
    XY          -   training set
    NPoints     -   training set size
    Decay       -   weight decay constant, >=0.001
                    Decay term 'Decay*||Weights||^2' is added to error
                    function.
                    If you don't know what Decay to choose, use 0.001.
    Restarts    -   number of restarts from random position, >0.
                    If you don't know what Restarts to choose, use 2.
    WStep       -   stopping criterion. Algorithm stops if  step  size  is
                    less than WStep. Recommended value - 0.01.  Zero  step
                    size means stopping after MaxIts iterations.
    MaxIts      -   stopping   criterion.  Algorithm  stops  after  MaxIts
                    iterations (NOT gradient  calculations).  Zero  MaxIts
                    means stopping when step is sufficiently small.

OUTPUT PARAMETERS:
    Network     -   trained neural network.
    Info        -   return code:
                    * -8, if both WStep=0 and MaxIts=0
                    * -2, if there is a point with class number
                          outside of [0..NOut-1].
                    * -1, if wrong parameters specified
                          (NPoints<0, Restarts<1).
                    *  2, if task has been solved.
    Rep         -   training report

  -- ALGLIB --
     Copyright 09.12.2007 by Bochkanov Sergey
*************************************************************************/
void mlptrainlbfgs(multilayerperceptron& network,
     const ap::real_2d_array& xy,
     int npoints,
     double decay,
     int restarts,
     double wstep,
     int maxits,
     int& info,
     mlpreport& rep)
{
    int i;
    int j;
    int pass;
    int nin;
    int nout;
    int wcount;
    ap::real_1d_array w;
    ap::real_1d_array wbest;
    double e;
    double v;
    double ebest;
    lbfgsreport internalrep;
    lbfgsstate state;

    
    //
    // Test inputs, parse flags, read network geometry
    //
    if( ap::fp_eq(wstep,0)&&maxits==0 )
    {
        info = -8;
        return;
    }
    if( npoints<=0||restarts<1||ap::fp_less(wstep,0)||maxits<0 )
    {
        info = -1;
        return;
    }
    mlpproperties(network, nin, nout, wcount);
    if( mlpissoftmax(network) )
    {
        for(i = 0; i <= npoints-1; i++)
        {
            if( ap::round(xy(i,nin))<0||ap::round(xy(i,nin))>=nout )
            {
                info = -2;
                return;
            }
        }
    }
    decay = ap::maxreal(decay, mindecay);
    info = 2;
    
    //
    // Prepare
    //
    mlpinitpreprocessor(network, xy, npoints);
    w.setbounds(0, wcount-1);
    wbest.setbounds(0, wcount-1);
    ebest = ap::maxrealnumber;
    
    //
    // Multiple starts
    //
    rep.ncholesky = 0;
    rep.nhess = 0;
    rep.ngrad = 0;
    for(pass = 1; pass <= restarts; pass++)
    {
        
        //
        // Process
        //
        mlprandomize(network);
        ap::vmove(&w(0), &network.weights(0), ap::vlen(0,wcount-1));
        minlbfgs(wcount, ap::minint(wcount, 50), w, 0.0, 0.0, wstep, maxits, 0, state);
        while(minlbfgsiteration(state))
        {
            ap::vmove(&network.weights(0), &state.x(0), ap::vlen(0,wcount-1));
            mlpgradnbatch(network, xy, npoints, state.f, state.g);
            v = ap::vdotproduct(&network.weights(0), &network.weights(0), ap::vlen(0,wcount-1));
            state.f = state.f+0.5*decay*v;
            ap::vadd(&state.g(0), &network.weights(0), ap::vlen(0,wcount-1), decay);
            rep.ngrad = rep.ngrad+1;
        }
        minlbfgsresults(state, w, internalrep);
        ap::vmove(&network.weights(0), &w(0), ap::vlen(0,wcount-1));
        
        //
        // Compare with best
        //
        v = ap::vdotproduct(&network.weights(0), &network.weights(0), ap::vlen(0,wcount-1));
        e = mlperrorn(network, xy, npoints)+0.5*decay*v;
        if( ap::fp_less(e,ebest) )
        {
            ap::vmove(&wbest(0), &network.weights(0), ap::vlen(0,wcount-1));
            ebest = e;
        }
    }
    
    //
    // The best network
    //
    ap::vmove(&network.weights(0), &wbest(0), ap::vlen(0,wcount-1));
}