Exemplos de dmax1 em C++ (Cpp)

Exemplo n.º 1

0

Exibir arquivo

void zsvdc(dcomplex *x, int ldx, int n, int p, dcomplex *s, dcomplex *e,
           dcomplex *u, int ldu, dcomplex *v, int ldv, dcomplex *work, 
           int job, int *info)
{
/*
      complex*16 x(ldx,1),s(1),e(1),u(ldu,1),v(ldv,1),work(1)
*/
    int i,iter,j,jobu,k,kase,kk,l,ll,lls,lm1,lp1,ls,lu,m,maxit,
             mm,mm1,mp1,nct,nctp1,ncu,nrt,nrtp1;
    dcomplex t,r;
    double  b,c,cs,el,emm1,f,g,scale,shift,sl,sm,sn,
                      smm1,t1,test,ztest;
    int wantu,wantv;

    /*
     * adjust arrays
     */
    s--;
    e--;
    work--;
    
    /*
     *     set the maximum number of iterations.
     */
    maxit = 30;
    /*
     *     determine what is to be computed.
     */
    wantu = FALSE;
    wantv = FALSE;
    jobu = (job % 100)/10;
    ncu = n;
    if (jobu > 1) 
        ncu = min(n,p);
    if (jobu != 0) 
        wantu = TRUE;
    if ((job % 10) != 0) 
        wantv = TRUE;
    /*
     *     reduce x to bidiagonal form, storing the diagonal elements
     *     in s and the super-diagonal elements in e.
     */
    *info = 0;
    nct = min(n-1,p);
    nrt = min(p-2,n);
    nrt = max(0,nrt);
    lu  = max(nct,nrt);
    if (lu >= 1) { 
        for (l = 1;l <= lu;l++) {
            lp1 = l + 1;
            if (l <= nct) { 
                /*
                 *   compute the transformation for the l-th column and
                 *   place the l-th diagonal in s(l).
                 */
                s[l] = Complex_d(dznrm2(n-l+1,&(X(l,l)),1), 0.0);
                if (cabs1(s[l]) != 0.0) { 
                   if (cabs1(X(l,l)) != 0.0) 
                       s[l] = csign(s[l],X(l,l));
                   zscal(n-l+1,zinv(s[l]),&(X(l,l)),1);
                   X(l,l) = Cadd_d(Complex_d(1.0,0.0), X(l,l));
                } 
                s[l] = DCmul_d(-1.0,s[l]);
            } 
            if (p >= lp1) {
                for (j = lp1;j <= p; j++) {
                    if (!((l > nct) || (cabs1(s[l]) == 0.0))) { 
                        /*
                         *  apply the transformation.
                         */
                        t = Cdiv_d(DCmul_d(-1.0,zdotc(n-l+1,&(X(l,l)),1,&(X(l,j)),1)),X(l,l));
                        zaxpy(n-l+1,t,&(X(l,l)),1,&(X(l,j)),1);
                    } 
                    /*
                     *       place the l-th row of x into  e for the
                     *       subsequent calculation of the row transformation.
                     */
                    e[j] = Conjg_d(X(l,j));
                }
            } 
            if (wantu && l <= nct) { 
                /*
                 *   place the transformation in u for subsequent back
                 *   multiplication.
                 */
                for (i = l;i <= n;i++)
                    U(i,l) = X(i,l);

            } 
            if (l <= nrt) { 
                /*
                 *   compute the l-th row transformation and place the
                 *   l-th super-diagonal in e(l).
                 */
                e[l] = Complex_d(dznrm2(p-l,&(e[lp1]),1),0.0);
                if (cabs1(e[l]) != 0.0) {
                    if (cabs1(e[lp1]) != 0.0) 
                        e[l] = csign(e[l],e[lp1]);
                    zscal(p-l,zinv(e[l]),&(e[lp1]),1);
                    e[lp1] = Cadd_d(Complex_d(1.0,0.0), e[lp1]);
                } 
                e[l] = DCmul_d(-1.0,Conjg_d(e[l]));
                if (lp1 <= n && cabs1(e[l]) != 0.0) { 
                    /*
                     *   apply the transformation.
                     */
                    for (i = lp1;i<= n;i++)
                        work[i] = Complex_d(0.0,0.0);
                    for (j = lp1;j<= p;j++)
                        zaxpy(n-l,e[j],&(X(lp1,j)),1,&(work[lp1]),1);
                    for (j = lp1;j<= p;j++)
                        zaxpy(n-l,Conjg_d(Cdiv_d(DCmul_d(-1.0,e[j]),e[lp1])),
                              &(work[lp1]),1,&(X(lp1,j)),1);
                } 
                if (wantv) 
                    /*
                     *   place the transformation in v for subsequent
                     *   back multiplication.
                     */
                    for (i = lp1;i<= p;i++)
                        V(i,l) = e[i];
            }
        }
    }
    /*
     *     set up the final bidiagonal matrix or order m.
     */
    m = min(p,n+1);
    nctp1 = nct + 1;
    nrtp1 = nrt + 1;
    if (nct < p) 
        s[nctp1] = X(nctp1,nctp1);
    if (n < m) 
        s[m] = Complex_d(0.0,0.0);
    if (nrtp1 < m) 
        e[nrtp1] = X(nrtp1,m);
    e[m] = Complex_d(0.0,0.0);
    /*
     *     if required, generate u.
     */
    if (wantu) {
        if (ncu >= nctp1) { 
            for (j = nctp1;j<= ncu;j++) {
                for (i = 1;i<= n;i++)
                    U(i,j) = Complex_d(0.0,0.0);
                U(j,j) = Complex_d(1.0,0.0);
            }
        } 
        if (nct >= 1) {
            for (ll = 1;ll<= nct;ll++) {
                l = nct - ll + 1;
                if (cabs1(s[l]) == 0.0) {
                    for (i = 1;i<= n;i++)
                        U(i,l) = Complex_d(0.0,0.0);
                    U(l,l) = Complex_d(1.0,0.0);
                }
                else {
                    lp1 = l + 1;
                    if (ncu >= lp1) {
                        for (j = lp1;j<=ncu;j++) {
                            t = DCmul_d(-1.0,Cdiv_d(zdotc(n-l+1,&(U(l,l)),1,&(U(l,j)),1),U(l,l)));
                            zaxpy(n-l+1,t,&(U(l,l)),1,&(U(l,j)),1);
                        }
                    } 
                    zscal(n-l+1,Complex_d(-1.0,0.0),&(U(l,l)),1);
                    U(l,l) = Cadd_d(Complex_d(1.0,0.0), U(l,l));
                    lm1 = l - 1;
                    if (lm1 >= 1) 
                        for (i = 1;i<= lm1;i++)
                            U(i,l) = Complex_d(0.0,0.0);
                }
            }
        }
    }
    /*  
     *     if it is required, generate v.
     */
    if (wantv) {
        for (ll = 1;ll<= p;ll++) {
            l = p - ll + 1;
            lp1 = l + 1;
            if ((l <= nrt) && (cabs1(e[l]) != 0.0)) {
                for (j = lp1;j<= p;j++) {
                    dcomplex dc,dd;
                    dc = zdotc(p-l,&(V(lp1,l)),1,&(V(lp1,j)),1);
                    dd = V(lp1,l);
                    t = Cdiv_d(DCmul_d(-1.0,dc),dd);
                    zaxpy(p-l,t,&(V(lp1,l)),1,&(V(lp1,j)),1);
                }
            }
            for (i = 1;i<= p;i++)
                V(i,l) = Complex_d(0.0,0.0);
            V(l,l) = Complex_d(1.0,0.0);
        }
    }
    /*
     *     transform s and e so that they are double precision.
     */
    for (i = 1;i<= m;i++) {
        if (cabs1(s[i]) != 0.0) { 
            t = Complex_d(Cabs_d(s[i]),0.0);
            r = Cdiv_d(s[i],t);
            s[i] = t;
            if (i < m) 
                e[i] = Cdiv_d(e[i],r);
            if (wantu) 
                zscal(n,r,&(U(1,i)),1);
        } 
        /*
         *     ...exit
         */
        if (i == m) 
            break; 
        if (cabs1(e[i]) != 0.0) {
            t = Complex_d(Cabs_d(e[i]),0.0);
            r = Cdiv_d(t,e[i]);
            e[i] = t;
            s[i+1] = Cmul_d(s[i+1],r);
            if (wantv) 
                zscal(p,r,&(V(1,i+1)),1);
        }
    } 
    /*
     *     main iteration loop for the singular values.
     */
    mm = m;
    iter = 0;
    while (1) { 
        /*
         *        quit if all the singular values have been found.
         *
         *     ...exit
         */
        if (m == 0) {
            break; 
        }
        /*
         *        if too many iterations have been performed, set
         *        flag and return.
         */
        if (iter >= maxit) {
            *info = m;
            /*
             *     ......exit
             */
            break; 
        } 
        /*
         *    this section of the program inspects for
         *    negligible elements in the s and e arrays.  on
         *    completion the variables kase and l are set as follows.
         *
         *       kase = 1     if s[m] and e(l-1) are negligible and l<m
         *       kase = 2     if s(l) is negligible and l<m
         *       kase = 3     if e(l-1) is negligible, l<m, and
         *                    s(l), ..., s[m] are not negligible (qr step).
         *       kase = 4     if e(m-1) is negligible (convergence).
         */
        for (ll = 1;ll<= m;ll++) {
            l = m - ll;
            if (l == 0) 
                break; 
            test = Cabs_d(s[l]) + Cabs_d(s[l+1]);
            ztest = test + Cabs_d(e[l]);
            if (ztest == test) { 
                e[l] = Complex_d(0.0,0.0);
                break;
            }
        }
        if (l == m - 1) {
            kase = 4;
        }
        else {
            lp1 = l + 1;
            mp1 = m + 1;
            for (lls = lp1;lls<= mp1;lls++) {
                ls = m - lls + lp1;
                if (ls == l) 
                    break;
                test = 0.0;
                if (ls != m) 
                    test += Cabs_d(e[ls]);
                if (ls != l + 1) 
                    test += Cabs_d(e[ls-1]);
                ztest = test + Cabs_d(s[ls]);
                if (ztest == test) { 
                    s[ls] = Complex_d(0.0,0.0);
                    break;
                } 
            } 
            if (ls == l) { 
                kase = 3;
            }
            else {
                if (ls == m) { 
                    kase = 1;
                }
                else {
                    kase = 2;
                    l = ls;
                }
            }  
        } 
        l++;
        /*
         *        perform the task indicated by kase.
         */
        switch (kase) {
        /*
         *        deflate negligible s[m].
         */
        case 1 :   
            mm1 = m - 1;
            f = e[m-1].r;
            e[m-1] = Complex_d(0.0,0.0);
            for (kk = l;kk<= mm1;kk++) {
                k = mm1 - kk + l;
                t1 = s[k].r;
                drotg(&t1,&f,&cs,&sn);
                s[k] = Complex_d(t1,0.0);
                if (k != l) {
                    f = -sn * e[k-1].r;
                    e[k-1] = DCmul_d(cs, e[k-1]);
                }
                if (wantv) 
                    zdrot(p,&(V(1,k)),1,&(V(1,m)),1,cs,sn);
            }
            break; 
            /*
             *        split at negligible s(l).
             */
        case 2 :   
            f = e[l-1].r;
            e[l-1] = Complex_d(0.0,0.0);
            for (k = l;k<= m;k++) {
                t1 = s[k].r;
                drotg(&t1,&f,&cs,&sn);
                s[k] = Complex_d(t1,0.0);
                f = -sn*e[k].r;
                e[k] = DCmul_d(cs,e[k]);
                if (wantu) 
                    zdrot(n,&(U(1,k)),1,&(U(1,l-1)),1,cs,sn);
            }
            break; 
            /*
             *        perform one qr step.
             */
        case 3 :   
            /*
             *        calculate the shift.
             */
            scale = dmax1(Cabs_d(s[m]),Cabs_d(s[m-1]),Cabs_d(e[m-1]),
                          Cabs_d(s[l]),Cabs_d(e[l]));
            sm    = s[m].r/scale;
            smm1  = s[m-1].r/scale;
            emm1  = e[m-1].r/scale;
            sl    = s[l].r/scale;
            el    = e[l].r/scale;
            b     = ((smm1 + sm)*(smm1 - sm) + emm1*emm1)/2.0;
            c     = pow(sm*emm1,2);
            shift = 0.0;
            if (b != 0.0 || c != 0.0) { 
                shift = sqrt(b*b+c);
                if (b < 0.0) 
                    shift = -shift;
                shift = c/(b + shift);
            } 
            f = (sl + sm)*(sl - sm) + shift;
            g = sl*el;
            /*
             *           chase zeros.
             */
            mm1 = m - 1;
            for (k = l;k<= mm1;k++) {
                drotg(&f,&g,&cs,&sn);
                if (k != l) 
                    e[k-1] = Complex_d(f,0.0);
                f = cs * s[k].r + sn * e[k].r;
                e[k] = Csub_d(DCmul_d(cs,e[k]), DCmul_d(sn,s[k]));
                g = sn * s[k+1].r;
                s[k+1] = DCmul_d(cs,s[k+1]);
                if (wantv) 
                    zdrot(p,&(V(1,k)),1,&(V(1,k+1)),1,cs,sn);
                drotg(&f,&g,&cs,&sn);
                s[k] = Complex_d(f,0.0);
                f = cs * e[k].r + sn * s[k+1].r;
                s[k+1] = Cadd_d(DCmul_d(-sn,e[k]), DCmul_d(cs,s[k+1]));
                g = sn * e[k+1].r;
                e[k+1] = DCmul_d(cs,e[k+1]);
                if (wantu && k < n)
                    zdrot(n,&(U(1,k)),1,&(U(1,k+1)),1,cs,sn);
            }
            e[m-1] = Complex_d(f,0.0);
            iter++;
            break; 
            /*
             *        convergence.
             */
        case 4 :    
            /*
             *           make the singular value  positive
             */
            if (s[l].r < 0.0) {
               s[l] = DCmul_d(-1.0,s[l]);
               if (wantv) 
                   zscal(p,Complex_d(-1.0,0.0),&(V(1,l)),1);
            }
            /*
             *           order the singular value.
             */
            while (l != mm) {
               if (s[l].r >= s[l+1].r) 
                   break;  
               t = s[l];
               s[l] = s[l+1];
               s[l+1] = t;
               if (wantv && l < p)
                   zswap(p,&(V(1,l)),1,&(V(1,l+1)),1);
               if (wantu && l < n)
                   zswap(n,&(U(1,l)),1,&(U(1,l+1)),1);
               l++;
            } 
            iter = 0;
            m--;
        } 
    }     
}

Exemplo n.º 2

0

Exibir arquivo

Arquivo: Lmdif.cpp Projeto: ANKELA/opensim-core

/*
*     **********
*
*     subroutine lmdif
*
*     the purpose of lmdif is to minimize the sum of the squares of
*     m nonlinear functions in n variables by a modification of
*     the levenberg-marquardt algorithm. the user must provide a
*     subroutine which calculates the functions. the jacobian is
*     then calculated by a forward-difference approximation.
*
*     the subroutine statement is
*
*   subroutine lmdif(fcn,m,n,x,fvec,ftol,xtol,gtol,maxfev,epsfcn,
*            diag,mode,factor,nprint,info,nfev,fjac,
*            ldfjac,ipvt,qtf,wa1,wa2,wa3,wa4)
*
*     where
*
*   fcn is the name of the user-supplied subroutine which
*     calculates the functions. fcn must be declared
*     in an external statement in the user calling
*     program, and should be written as follows.
*
*     subroutine fcn(m,n,x,fvec,iflag)
*     integer m,n,iflag
*     double precision x(n),fvec(m)
*     ----------
*     calculate the functions at x and
*     return this vector in fvec.
*     ----------
*     return
*     end
*
*     the value of iflag should not be changed by fcn unless
*     the user wants to terminate execution of lmdif.
*     in this case set iflag to a negative integer.
*
*   m is a positive integer input variable set to the number
*     of functions.
*
*   n is a positive integer input variable set to the number
*     of variables. n must not exceed m.
*
*   x is an array of length n. on input x must contain
*     an initial estimate of the solution vector. on output x
*     contains the final estimate of the solution vector.
*
*   fvec is an output array of length m which contains
*     the functions evaluated at the output x.
*
*   ftol is a nonnegative input variable. termination
*     occurs when both the actual and predicted relative
*     reductions in the sum of squares are at most ftol.
*     therefore, ftol measures the relative error desired
*     in the sum of squares.
*
*   xtol is a nonnegative input variable. termination
*     occurs when the relative error between two consecutive
*     iterates is at most xtol. therefore, xtol measures the
*     relative error desired in the approximate solution.
*
*   gtol is a nonnegative input variable. termination
*     occurs when the cosine of the angle between fvec and
*     any column of the jacobian is at most gtol in absolute
*     value. therefore, gtol measures the orthogonality
*     desired between the function vector and the columns
*     of the jacobian.
*
*   maxfev is a positive integer input variable. termination
*     occurs when the number of calls to fcn is at least
*     maxfev by the end of an iteration.
*
*   epsfcn is an input variable used in determining a suitable
*     step length for the forward-difference approximation. this
*     approximation assumes that the relative errors in the
*     functions are of the order of epsfcn. if epsfcn is less
*     than the machine precision, it is assumed that the relative
*     errors in the functions are of the order of the machine
*     precision.
*
*   diag is an array of length n. if mode = 1 (see
*     below), diag is internally set. if mode = 2, diag
*     must contain positive entries that serve as
*     multiplicative scale factors for the variables.
*
*   mode is an integer input variable. if mode = 1, the
*     variables will be scaled internally. if mode = 2,
*     the scaling is specified by the input diag. other
*     values of mode are equivalent to mode = 1.
*
*   factor is a positive input variable used in determining the
*     initial step bound. this bound is set to the product of
*     factor and the euclidean norm of diag*x if nonzero, or else
*     to factor itself. in most cases factor should lie in the
*     interval (.1,100.). 100. is a generally recommended value.
*
*   nprint is an integer input variable that enables controlled
*     printing of iterates if it is positive. in this case,
*     fcn is called with iflag = 0 at the beginning of the first
*     iteration and every nprint iterations thereafter and
*     immediately prior to return, with x and fvec available
*     for printing. if nprint is not positive, no special calls
*     of fcn with iflag = 0 are made.
*
*   info is an integer output variable. if the user has
*     terminated execution, info is set to the (negative)
*     value of iflag. see description of fcn. otherwise,
*     info is set as follows.
*
*     info = 0  improper input parameters.
*
*     info = 1  both actual and predicted relative reductions
*           in the sum of squares are at most ftol.
*
*     info = 2  relative error between two consecutive iterates
*           is at most xtol.
*
*     info = 3  conditions for info = 1 and info = 2 both hold.
*
*     info = 4  the cosine of the angle between fvec and any
*           column of the jacobian is at most gtol in
*           absolute value.
*
*     info = 5  number of calls to fcn has reached or
*           exceeded maxfev.
*
*     info = 6  ftol is too small. no further reduction in
*           the sum of squares is possible.
*
*     info = 7  xtol is too small. no further improvement in
*           the approximate solution x is possible.
*
*     info = 8  gtol is too small. fvec is orthogonal to the
*           columns of the jacobian to machine precision.
*
*   nfev is an integer output variable set to the number of
*     calls to fcn.
*
*   fjac is an output m by n array. the upper n by n submatrix
*     of fjac contains an upper triangular matrix r with
*     diagonal elements of nonincreasing magnitude such that
*
*        t     t       t
*       p *(jac *jac)*p = r *r,
*
*     where p is a permutation matrix and jac is the final
*     calculated jacobian. column j of p is column ipvt(j)
*     (see below) of the identity matrix. the lower trapezoidal
*     part of fjac contains information generated during
*     the computation of r.
*
*   ldfjac is a positive integer input variable not less than m
*     which specifies the leading dimension of the array fjac.
*
*   ipvt is an integer output array of length n. ipvt
*     defines a permutation matrix p such that jac*p = q*r,
*     where jac is the final calculated jacobian, q is
*     orthogonal (not stored), and r is upper triangular
*     with diagonal elements of nonincreasing magnitude.
*     column j of p is column ipvt(j) of the identity matrix.
*
*   qtf is an output array of length n which contains
*     the first n elements of the vector (q transpose)*fvec.
*
*   wa1, wa2, and wa3 are work arrays of length n.
*
*   wa4 is a work array of length m.
*
*     subprograms called
*
*   user-supplied ...... fcn
*
*   minpack-supplied ... dpmpar,enorm,fdjac2,lmpar,qrfac
*
*   fortran-supplied ... dabs,dmax1,dmin1,dsqrt,mod
*
*     argonne national laboratory. minpack project. march 1980.
*     burton s. garbow, kenneth e. hillstrom, jorge j. more
*
*     **********
*/
void lmdif_C(
  void (*fcn)(int, int, double[], double[], int *, void *),
  int    m,
  int    n,
  double x[],
  double fvec[],
  double ftol,
  double xtol,
  double gtol,
  int    maxfev,
  double epsfcn,
  double diag[],
  int    mode,
  double factor,
  int    nprint,
  int   *info,
  int   *nfev,
  double fjac[],
  int    ldfjac,
  int    ipvt[],
  double qtf[],
  double wa1[],
  double wa2[],
  double wa3[],
  double wa4[],
  void *data)
{
   int i;
   int iflag;
   int ij;
   int jj;
   int iter;
   int j;
   int l;
   double actred;
   double delta;
   double dirder;
   double fnorm;
   double fnorm1;
   double gnorm;
   double par;
   double pnorm;
   double prered;
   double ratio;
   double sum;
   double temp;
   double temp1;
   double temp2;
   double temp3;
   double xnorm;
   static double one = 1.0;
   static double p1 = 0.1;
   static double p5 = 0.5;
   static double p25 = 0.25;
   static double p75 = 0.75;
   static double p0001 = 1.0e-4;
   static double zero = 0.0;
   //static double p05 = 0.05;
   
   *info = 0;
   iflag = 0;
   *nfev = 0;
   
   /*
    *     check the input parameters for errors.
    */
   if ((n <= 0) || (m < n) || (ldfjac < m) || (ftol < zero)
      || (xtol < zero) || (gtol < zero) || (maxfev <= 0)
      || (factor <= zero))
      goto L300;
   
   if (mode == 2)
   {
      /* scaling by diag[] */
      for (j=0; j<n; j++)
      {
         if (diag[j] <= 0.0)
            goto L300;
      }   
   }
   
#ifdef BUG
   printf( "lmdif\n" );
#endif
   
   /* evaluate the function at the starting point
    * and calculate its norm.
    */
   iflag = 1;
   fcn(m,n,x,fvec,&iflag, data);
   *nfev = 1;
   if (iflag < 0)
      goto L300;
   
   fnorm = enorm(m,fvec);
   /* initialize levenberg-marquardt parameter and iteration counter. */
   par = zero;
   iter = 1;

   /* beginning of the outer loop. */
L30:
   
   /* calculate the jacobian matrix. */
   iflag = 2;
   fdjac2(fcn, m,n,x,fvec,fjac,ldfjac,&iflag,epsfcn,wa4, data);
   // commented out DKB   
   //    *nfev += n;
   if (iflag < 0)
      goto L300;

   /* if requested, call fcn to enable printing of iterates. */
   if (nprint > 0)
   {
      iflag = 0;
      if (mod(iter-1,nprint) == 0)
      {
         fcn(m,n,x,fvec,&iflag, data);
         if (iflag < 0)
            goto L300;        
     //    printf( "fnorm %.15e\n", enorm(m,fvec));
      }
   }
   /* compute the qr factorization of the jacobian. */
   qrfac(m,n,fjac,ldfjac,1,ipvt,n,wa1,wa2,wa3);
//   for (j = 0; j < n; j++)
//   {
//      printf("wa1[%d] = %e\n", j, wa1[j]);
//      printf("wa2[%d] = %e\n", j, wa2[j]);
//      printf("wa3[%d] = %e\n", j, wa3[j]);
//   }
   /* on the first iteration and if mode is 1, scale according
    * to the norms of the columns of the initial jacobian.
    */
   if (iter == 1)
   {
 //     printf("iter = 1, mode = %d\n", mode);
      if (mode != 2)
      {
         for (j=0; j<n; j++)
         {
            diag[j] = wa2[j];
            if (wa2[j] == zero)
               diag[j] = one;
         }
      }
      
      /* on the first iteration, calculate the norm of the scaled x
       * and initialize the step bound delta.
       */
      for (j=0; j<n; j++)
         wa3[j] = diag[j] * x[j];
      
      xnorm = enorm(n,wa3);
      delta = factor*xnorm;
     // printf("iter1: xnorm = %e, delta = %e\n", xnorm, delta);
      //dkb
      if (fabs(delta) <= 1e-4)
//      if (delta == zero)
         delta = factor;
   }
   
   /* form (q transpose)*fvec and store the first n components in qtf. */
   for (i=0; i<m; i++)
      wa4[i] = fvec[i];
   
   jj = 0;
   for (j=0; j<n; j++)
   {
      temp3 = fjac[jj];
      if (temp3 != zero)
      {
         sum = zero;
         ij = jj;
         for (i=j; i<m; i++)
         {
            sum += fjac[ij] * wa4[i];
            ij += 1;    /* fjac[i+m*j] */
         }
         temp = -sum / temp3;
         ij = jj;
         for (i=j; i<m; i++)
         {
            wa4[i] += fjac[ij] * temp;
            ij += 1;    /* fjac[i+m*j] */
         }
      }
      fjac[jj] = wa1[j];
      jj += m+1;  /* fjac[j+m*j] */
      qtf[j] = wa4[j];
   }
   
   /* compute the norm of the scaled gradient. */
   gnorm = zero;
   if (fnorm != zero)
   {
      jj = 0;
      for (j=0; j<n; j++)
      {
         l = ipvt[j];
         if (wa2[l] != zero)
         {
            sum = zero;
            ij = jj;
            for (i=0; i<=j; i++)
            {
               sum += fjac[ij]*(qtf[i]/fnorm);
               ij += 1; /* fjac[i+m*j] */
            }
            gnorm = dmax1(gnorm,fabs(sum/wa2[l]));
         }
         jj += m;
      }
   }
   
   /* test for convergence of the gradient norm. */
   if (gnorm <= gtol)
      *info = 4;
   if (*info != 0)
      goto L300;
   
//for (j = 0; j < n; j++)
//   printf("diag[%d] = %e, wa2[%d] = %e\n", j, diag[j], j, wa2[j]);
   /* rescale if necessary. */
   if (mode != 2)
   {
      for (j=0; j<n; j++)
         diag[j] = dmax1(diag[j],wa2[j]);
   }
   
   /* beginning of the inner loop. */
L200:
   
   /* determine the levenberg-marquardt parameter. */
   lmpar(n,fjac,ldfjac,ipvt,diag,qtf,delta,&par,wa1,wa2,wa3,wa4);
   /* store the direction p and x + p. calculate the norm of p. */
   for (j=0; j<n; j++)
   {
      wa1[j] = -wa1[j];
      wa2[j] = x[j] + wa1[j];
      wa3[j] = diag[j]*wa1[j];
      //printf("wa2[%d] = %e + %e = %e\n", j, x[j], wa1[j], wa2[j]);
   }
   pnorm = enorm(n,wa3);
   /* on the first iteration, adjust the initial step bound. */
   if (iter == 1)
      delta = dmin1(delta,pnorm);
      
   /* evaluate the function at x + p and calculate its norm. */
   iflag = 1;
   //printf("evaluate at:\n");
   //for (j=0; j<n; j++)
   //   printf("wa2[%d] = %e\n", j, wa2[j]);
   fcn(m,n,wa2,wa4,&iflag, data);
   *nfev += 1;
   if (iflag < 0)
      goto L300;
   
   fnorm1 = enorm(m,wa4);
   
#ifdef BUG 
   printf( "pnorm %.10e  fnorm1 %.10e\n", pnorm, fnorm1 );
#endif
   
   /* compute the scaled actual reduction. */
   actred = -one;
   if ((p1*fnorm1) < fnorm)
   {
      temp = fnorm1/fnorm;
      actred = one - temp * temp;
   }
   /* compute the scaled predicted reduction and
    * the scaled directional derivative.
    */
   jj = 0;
   for (j=0; j<n; j++)
   {
      wa3[j] = zero;
      l = ipvt[j];
      temp = wa1[l];
      ij = jj;
      for (i=0; i<=j; i++)
      {
         wa3[i] += fjac[ij]*temp;
         ij += 1; /* fjac[i+m*j] */
      }
      jj += m;
   }
   temp1 = enorm(n,wa3)/fnorm;
   temp2 = (sqrt(par)*pnorm)/fnorm;
   prered = temp1*temp1 + (temp2*temp2)/p5;
   dirder = -(temp1*temp1 + temp2*temp2);
   /* compute the ratio of the actual to the predicted reduction. */
   ratio = zero;
   if (prered != zero)
      ratio = actred/prered;
   /* update the step bound. */
   if (ratio <= p25)
   {
      if (actred >= zero)
         temp = p5;
      else
         temp = p5*dirder/(dirder + p5*actred);
      if (((p1*fnorm1) >= fnorm) || (temp < p1))
         temp = p1;
      
      delta = temp*dmin1(delta,pnorm/p1);
      par = par/temp;
   }
   else
   {
      if ((par == zero) || (ratio >= p75))
      {
         delta = pnorm/p5;
         par = p5*par;
      }
   }
   /* test for successful iteration. */
   if (ratio >= p0001)
   {
      /* successful iteration. update x, fvec, and their norms. */
      for (j=0; j<n; j++)
      {
         x[j] = wa2[j];
         wa2[j] = diag[j]*x[j];
      }
      for (i=0; i<m; i++)
         fvec[i] = wa4[i];
      
      xnorm = enorm(n,wa2);
      fnorm = fnorm1;
      iter += 1;
   }
   /* tests for convergence. */
   if ((fabs(actred) <= ftol) && (prered <= ftol) && (p5*ratio <= one))
   {
      *info = 1;
   }
   
   if (delta <= xtol*xnorm)
      *info = 2;
   
   if ((fabs(actred) <= ftol) && (prered <= ftol)
      && (p5*ratio <= one) && (*info == 2))
   {
      *info = 3;
   }
   
   if (*info != 0)
      goto L300;
   
   /* tests for termination and stringent tolerances. */
   if (*nfev >= maxfev)
      *info = 5;
   
   if ((fabs(actred) <= MACHEP) && (prered <= MACHEP) && (p5*ratio <= one))
   {
      *info = 6;
   }
   
   if (delta <= MACHEP*xnorm)
      *info = 7;
   
   if (gnorm <= MACHEP)
      *info = 8;
   
   if (*info != 0)
      goto L300;
   
   /* end of the inner loop. repeat if iteration unsuccessful. */
   if (ratio < p0001)
      goto L200;
   
   /*  end of the outer loop. */
   goto L30;
   
L300:

   /* termination, either normal or user imposed. */
   if (iflag < 0)
      *info = iflag;
   
   iflag = 0;
   if (nprint > 0)
      fcn(m,n,x,fvec,&iflag, data);
}

Exemplo n.º 3

0

Exibir arquivo

Arquivo: Lmdif.cpp Projeto: ANKELA/opensim-core

/*     **********
*
*     subroutine lmpar
*
*     given an m by n matrix a, an n by n nonsingular diagonal
*     matrix d, an m-vector b, and a positive number delta,
*     the problem is to determine a value for the parameter
*     par such that if x solves the system
*
*       a*x = b ,     sqrt(par)*d*x = 0 ,
*
*     in the least squares sense, and dxnorm is the euclidean
*     norm of d*x, then either par is zero and
*
*       (dxnorm-delta) .le. 0.1*delta ,
*
*     or par is positive and
*
*       abs(dxnorm-delta) .le. 0.1*delta .
*
*     this subroutine completes the solution of the problem
*     if it is provided with the necessary information from the
*     qr factorization, with column pivoting, of a. that is, if
*     a*p = q*r, where p is a permutation matrix, q has orthogonal
*     columns, and r is an upper triangular matrix with diagonal
*     elements of nonincreasing magnitude, then lmpar expects
*     the full upper triangle of r, the permutation matrix p,
*     and the first n components of (q transpose)*b. on output
*     lmpar also provides an upper triangular matrix s such that
*
*        t   t           t
*       p *(a *a + par*d*d)*p = s *s .
*
*     s is employed within lmpar and may be of separate interest.
*
*     only a few iterations are generally needed for convergence
*     of the algorithm. if, however, the limit of 10 iterations
*     is reached, then the output par will contain the best
*     value obtained so far.
*
*     the subroutine statement is
*
*   subroutine lmpar(n,r,ldr,ipvt,diag,qtb,delta,par,x,sdiag,
*            wa1,wa2)
*
*     where
*
*   n is a positive integer input variable set to the order of r.
*
*   r is an n by n array. on input the full upper triangle
*     must contain the full upper triangle of the matrix r.
*     on output the full upper triangle is unaltered, and the
*     strict lower triangle contains the strict upper triangle
*     (transposed) of the upper triangular matrix s.
*
*   ldr is a positive integer input variable not less than n
*     which specifies the leading dimension of the array r.
*
*   ipvt is an integer input array of length n which defines the
*     permutation matrix p such that a*p = q*r. column j of p
*     is column ipvt(j) of the identity matrix.
*
*   diag is an input array of length n which must contain the
*     diagonal elements of the matrix d.
*
*   qtb is an input array of length n which must contain the first
*     n elements of the vector (q transpose)*b.
*
*   delta is a positive input variable which specifies an upper
*     bound on the euclidean norm of d*x.
*
*   par is a nonnegative variable. on input par contains an
*     initial estimate of the levenberg-marquardt parameter.
*     on output par contains the final estimate.
*
*   x is an output array of length n which contains the least
*     squares solution of the system a*x = b, sqrt(par)*d*x = 0,
*     for the output par.
*
*   sdiag is an output array of length n which contains the
*     diagonal elements of the upper triangular matrix s.
*
*   wa1 and wa2 are work arrays of length n.
*
*     subprograms called
*
*   minpack-supplied ... dpmpar,enorm,qrsolv
*
*   fortran-supplied ... dabs,dmax1,dmin1,dsqrt
*
*     argonne national laboratory. minpack project. march 1980.
*     burton s. garbow, kenneth e. hillstrom, jorge j. more
*
*     **********
*/
void lmpar(
  int n,
  double r[],
  int ldr,
  int ipvt[],
  double diag[],
  double qtb[],
  double delta,
  double *par,
  double x[],
  double sdiag[],
  double wa1[],
  double wa2[])
{
   int i;
   int iter;
   int ij;
   int jj;
   int j;
   int jm1;
   int jp1;
   int k;
   int l;
   int nsing;
   double dxnorm;
   double fp;
   double gnorm;
   double parc;
   double parl;
   double paru;
   double sum;
   double temp;
   static double zero = 0.0;
   //static double one = 1.0;
   static double p1 = 0.1;
   static double p001 = 0.001;
   
#ifdef BUG
   printf( "lmpar\n" );
#endif
   
   /* compute and store in x the gauss-newton direction. if the
    * jacobian is rank-deficient, obtain a least squares solution.
    */
   nsing = n;
   jj = 0;
   for (j=0; j<n; j++)
   {
      wa1[j] = qtb[j];
      if ((r[jj] == zero) && (nsing == n))
         nsing = j;
      
      if (nsing < n)
         wa1[j] = zero;
      
      jj += ldr+1; /* [j+ldr*j] */
   }
   
#ifdef BUG
   printf( "nsing %d ", nsing );
#endif
   
   if (nsing >= 1)
   {
      for (k=0; k<nsing; k++)
      {
         j = nsing - k - 1;
         wa1[j] = wa1[j]/r[j+ldr*j];
         temp = wa1[j];
         jm1 = j - 1;
         if (jm1 >= 0)
         {
            ij = ldr * j;
            for (i=0; i<=jm1; i++)
            {
               wa1[i] -= r[ij]*temp;
               ij += 1;
            }
         }
      }
   }
   
   for (j=0; j<n; j++)
   {
      l = ipvt[j];
      x[l] = wa1[j];
   }
   /* initialize the iteration counter.
    * evaluate the function at the origin, and test
    * for acceptance of the gauss-newton direction.
    */
   iter = 0;
   for (j=0; j<n; j++)
      wa2[j] = diag[j]*x[j];
   
   dxnorm = enorm(n,wa2);
   fp = dxnorm - delta;
   if (fp <= p1*delta)
   {
#ifdef BUG
      printf( "going to L220\n" );
#endif
      
      goto L220;
   }
   
   /* if the jacobian is not rank deficient, the newton
    * step provides a lower bound, parl, for the zero of
    * the function. otherwise set this bound to zero.
    */
   parl = zero;
   if (nsing >= n)
   {
      for (j=0; j<n; j++)
      {
         l = ipvt[j];
         wa1[j] = diag[l]*(wa2[l]/dxnorm);
      }
      jj = 0;
      for (j=0; j<n; j++)
      {
         sum = zero;
         jm1 = j - 1;
         if (jm1 >= 0)
         {
            ij = jj;
            for (i=0; i<=jm1; i++)
            {
               sum += r[ij]*wa1[i];
               ij += 1;
            }
         }
         wa1[j] = (wa1[j] - sum)/r[j+ldr*j];
         jj += ldr; /* [i+ldr*j] */
      }
      temp = enorm(n,wa1);
      parl = ((fp/delta)/temp)/temp;
   }
   /* calculate an upper bound, paru, for the zero of the function. */
   jj = 0;
   for (j=0; j<n; j++)
   {
      sum = zero;
      ij = jj;
      for (i=0; i<=j; i++)
      {
         sum += r[ij]*qtb[i];
         ij += 1;
      }
      l = ipvt[j];
      wa1[j] = sum/diag[l];
      jj += ldr; /* [i+ldr*j] */
   }
   gnorm = enorm(n,wa1);
   paru = gnorm/delta;
   if(paru == zero)
      paru = DWARF/dmin1(delta,p1);
   
   /* if the input par lies outside of the interval (parl,paru),
    * set par to the closer endpoint.
    */
   *par = dmax1(*par,parl);
   *par = dmin1(*par,paru);
   if (*par == zero)
      *par = gnorm/dxnorm;
   
#ifdef BUG
   printf( "parl %.4e  par %.4e  paru %.4e\n", parl, *par, paru );
#endif
   
   /* beginning of an iteration. */
   
L150:
   
   iter += 1;
   /*  evaluate the function at the current value of par. */
   if (*par == zero)
      *par = dmax1(DWARF,p001*paru);
   
   temp = sqrt(*par);
   for (j=0; j<n; j++)
      wa1[j] = temp*diag[j];

   qrsolv(n,r,ldr,ipvt,wa1,qtb,x,sdiag,wa2);
   for (j=0; j<n; j++)
      wa2[j] = diag[j]*x[j];
   
   dxnorm = enorm(n,wa2);
   temp = fp;
   fp = dxnorm - delta;
   /* if the function is small enough, accept the current value
    * of par. also test for the exceptional cases where parl
    * is zero or the number of iterations has reached 10.
    */
   if ((fabs(fp) <= p1*delta)
      || ((parl == zero) && (fp <= temp) && (temp < zero))
      || (iter == 10))
   {
      goto L220;
   }
   /* compute the newton correction. */
   for (j=0; j<n; j++)
   {
      l = ipvt[j];
      wa1[j] = diag[l]*(wa2[l]/dxnorm);
   }
   jj = 0;
   for (j=0; j<n; j++)
   {
      wa1[j] = wa1[j]/sdiag[j];
      temp = wa1[j];
      jp1 = j + 1;
      if (jp1 < n)
      {
         ij = jp1 + jj;
         for (i=jp1; i<n; i++)
         {
            wa1[i] -= r[ij]*temp;
            ij += 1; /* [i+ldr*j] */
         }
      }
      jj += ldr; /* ldr*j */
   }
   temp = enorm(n,wa1);
   parc = ((fp/delta)/temp)/temp;
   
   /* depending on the sign of the function, update parl or paru. */
   if (fp > zero)
      parl = dmax1(parl, *par);
   
   if (fp < zero)
      paru = dmin1(paru, *par);
   
   /* compute an improved estimate for par. */
   *par = dmax1(parl, *par + parc);
   
   /* end of an iteration. */
   
   goto L150;
   
L220:
   
/* termination. */
   
   if (iter == 0)
      *par = zero;
}

Exemplo n.º 4

0

Exibir arquivo

Arquivo: Lmdif.cpp Projeto: ANKELA/opensim-core

/*
*     **********
*
*     subroutine qrfac
*
*     this subroutine uses householder transformations with column
*     pivoting (optional) to compute a qr factorization of the
*     m by n matrix a. that is, qrfac determines an orthogonal
*     matrix q, a permutation matrix p, and an upper trapezoidal
*     matrix r with diagonal elements of nonincreasing magnitude,
*     such that a*p = q*r. the householder transformation for
*     column k, k = 1,2,...,min(m,n), is of the form
*
*               t
*       i - (1/u(k))*u*u
*
*     where u has zeros in the first k-1 positions. the form of
*     this transformation and the method of pivoting first
*     appeared in the corresponding linpack subroutine.
*
*     the subroutine statement is
*
*   subroutine qrfac(m,n,a,lda,pivot,ipvt,lipvt,rdiag,acnorm,wa)
*
*     where
*
*   m is a positive integer input variable set to the number
*     of rows of a.
*
*   n is a positive integer input variable set to the number
*     of columns of a.
*
*   a is an m by n array. on input a contains the matrix for
*     which the qr factorization is to be computed. on output
*     the strict upper trapezoidal part of a contains the strict
*     upper trapezoidal part of r, and the lower trapezoidal
*     part of a contains a factored form of q (the non-trivial
*     elements of the u vectors described above).
*
*   lda is a positive integer input variable not less than m
*     which specifies the leading dimension of the array a.
*
*   pivot is a logical input variable. if pivot is set true,
*     then column pivoting is enforced. if pivot is set false,
*     then no column pivoting is done.
*
*   ipvt is an integer output array of length lipvt. ipvt
*     defines the permutation matrix p such that a*p = q*r.
*     column j of p is column ipvt(j) of the identity matrix.
*     if pivot is false, ipvt is not referenced.
*
*   lipvt is a positive integer input variable. if pivot is false,
*     then lipvt may be as small as 1. if pivot is true, then
*     lipvt must be at least n.
*
*   rdiag is an output array of length n which contains the
*     diagonal elements of r.
*
*   acnorm is an output array of length n which contains the
*     norms of the corresponding columns of the input matrix a.
*     if this information is not needed, then acnorm can coincide
*     with rdiag.
*
*   wa is a work array of length n. if pivot is false, then wa
*     can coincide with rdiag.
*
*     subprograms called
*
*   minpack-supplied ... dpmpar,enorm
*
*   fortran-supplied ... dmax1,dsqrt,min0
*
*     argonne national laboratory. minpack project. march 1980.
*     burton s. garbow, kenneth e. hillstrom, jorge j. more
*
*     **********
*/
void qrfac(int m,
           int n,
           double a[],
           int lda,
           int pivot,
           int ipvt[],
           int lipvt,
           double rdiag[],
           double acnorm[],
           double wa[])
{
   int i;
   int ij;
   int jj;
   int j;
   int jp1;
   int k;
   int kmax;
   int minmn;
   double ajnorm;
   double sum;
   double temp;
   static double zero = 0.0;
   static double one = 1.0;
   static double p05 = 0.05;
   
   /* compute the initial column norms and initialize several arrays. */
   //printf("\nqrfac\n");
   ij = 0;
   for (j=0; j<n; j++)
   {
      acnorm[j] = enorm(m,&a[ij]);
      rdiag[j] = acnorm[j];
      wa[j] = rdiag[j];
      if (pivot != 0)
         ipvt[j] = j;
      
      ij += m; /* m*j */
     // printf("acnorm[%d] = %e\n", j, acnorm[j]);
     // printf("rdiag[%d] = %e\n", j, rdiag[j]);
   }
   
#ifdef BUG
   printf( "qrfac\n" );
#endif
   
   /* reduce a to r with householder transformations. */
   
   minmn = min0(m,n);
   for (j=0; j<minmn; j++)
   {
      if (pivot == 0)
         goto L40;
      
      /* bring the column of largest norm into the pivot position. */
      kmax = j;
      for (k=j; k<n; k++)
      {
         if (rdiag[k] > rdiag[kmax])
            kmax = k;
      }
      if (kmax == j)
         goto L40;
      
      ij = m * j;
      jj = m * kmax;
      for (i=0; i<m; i++)
      {
         temp = a[ij]; /* [i+m*j] */
         a[ij] = a[jj]; /* [i+m*kmax] */
         a[jj] = temp;
         ij += 1;
         jj += 1;
      }
      rdiag[kmax] = rdiag[j];
      wa[kmax] = wa[j];
      k = ipvt[j];
      ipvt[j] = ipvt[kmax];
      ipvt[kmax] = k;
      
L40:
      
      /* compute the householder transformation to reduce the
       * j-th column of a to a multiple of the j-th unit vector.
       */
      jj = j + m*j;
      ajnorm = enorm(m-j,&a[jj]);
      if (ajnorm == zero)
         goto L100;
      
      if (a[jj] < zero)
         ajnorm = -ajnorm;
      
      ij = jj;
      for (i=j; i<m; i++)
      {
         a[ij] /= ajnorm;
         ij += 1; /* [i+m*j] */
      }
      a[jj] += one;
      
      /* apply the transformation to the remaining columns
       * and update the norms.
       */
      jp1 = j + 1;
      if (jp1 < n)
      {
         for (k=jp1; k<n; k++)
         {
            sum = zero;
            ij = j + m*k;
            jj = j + m*j;
            for (i=j; i<m; i++)
            {
               sum += a[jj]*a[ij];
               ij += 1; /* [i+m*k] */
               jj += 1; /* [i+m*j] */
            }
            temp = sum/a[j+m*j];
            ij = j + m*k;
            jj = j + m*j;
            for (i=j; i<m; i++)
            {
               a[ij] -= temp*a[jj];
               ij += 1; /* [i+m*k] */
               jj += 1; /* [i+m*j] */
            }
            if ((pivot != 0) && (rdiag[k] != zero))
            {
               temp = a[j+m*k]/rdiag[k];
               temp = dmax1(zero, one-temp*temp);
               rdiag[k] *= sqrt(temp);
               temp = rdiag[k]/wa[k];
               if ((p05*temp*temp) <= MACHEP)
               {
                  rdiag[k] = enorm(m-j-1,&a[jp1+m*k]);
                  wa[k] = rdiag[k];
               }
            }
         }
      }
      
L100:
      
      rdiag[j] = -ajnorm;
   }
}