void zsvdc(dcomplex *x, int ldx, int n, int p, dcomplex *s, dcomplex *e, dcomplex *u, int ldu, dcomplex *v, int ldv, dcomplex *work, int job, int *info) { /* complex*16 x(ldx,1),s(1),e(1),u(ldu,1),v(ldv,1),work(1) */ int i,iter,j,jobu,k,kase,kk,l,ll,lls,lm1,lp1,ls,lu,m,maxit, mm,mm1,mp1,nct,nctp1,ncu,nrt,nrtp1; dcomplex t,r; double b,c,cs,el,emm1,f,g,scale,shift,sl,sm,sn, smm1,t1,test,ztest; int wantu,wantv; /* * adjust arrays */ s--; e--; work--; /* * set the maximum number of iterations. */ maxit = 30; /* * determine what is to be computed. */ wantu = FALSE; wantv = FALSE; jobu = (job % 100)/10; ncu = n; if (jobu > 1) ncu = min(n,p); if (jobu != 0) wantu = TRUE; if ((job % 10) != 0) wantv = TRUE; /* * reduce x to bidiagonal form, storing the diagonal elements * in s and the super-diagonal elements in e. */ *info = 0; nct = min(n-1,p); nrt = min(p-2,n); nrt = max(0,nrt); lu = max(nct,nrt); if (lu >= 1) { for (l = 1;l <= lu;l++) { lp1 = l + 1; if (l <= nct) { /* * compute the transformation for the l-th column and * place the l-th diagonal in s(l). */ s[l] = Complex_d(dznrm2(n-l+1,&(X(l,l)),1), 0.0); if (cabs1(s[l]) != 0.0) { if (cabs1(X(l,l)) != 0.0) s[l] = csign(s[l],X(l,l)); zscal(n-l+1,zinv(s[l]),&(X(l,l)),1); X(l,l) = Cadd_d(Complex_d(1.0,0.0), X(l,l)); } s[l] = DCmul_d(-1.0,s[l]); } if (p >= lp1) { for (j = lp1;j <= p; j++) { if (!((l > nct) || (cabs1(s[l]) == 0.0))) { /* * apply the transformation. */ t = Cdiv_d(DCmul_d(-1.0,zdotc(n-l+1,&(X(l,l)),1,&(X(l,j)),1)),X(l,l)); zaxpy(n-l+1,t,&(X(l,l)),1,&(X(l,j)),1); } /* * place the l-th row of x into e for the * subsequent calculation of the row transformation. */ e[j] = Conjg_d(X(l,j)); } } if (wantu && l <= nct) { /* * place the transformation in u for subsequent back * multiplication. */ for (i = l;i <= n;i++) U(i,l) = X(i,l); } if (l <= nrt) { /* * compute the l-th row transformation and place the * l-th super-diagonal in e(l). */ e[l] = Complex_d(dznrm2(p-l,&(e[lp1]),1),0.0); if (cabs1(e[l]) != 0.0) { if (cabs1(e[lp1]) != 0.0) e[l] = csign(e[l],e[lp1]); zscal(p-l,zinv(e[l]),&(e[lp1]),1); e[lp1] = Cadd_d(Complex_d(1.0,0.0), e[lp1]); } e[l] = DCmul_d(-1.0,Conjg_d(e[l])); if (lp1 <= n && cabs1(e[l]) != 0.0) { /* * apply the transformation. */ for (i = lp1;i<= n;i++) work[i] = Complex_d(0.0,0.0); for (j = lp1;j<= p;j++) zaxpy(n-l,e[j],&(X(lp1,j)),1,&(work[lp1]),1); for (j = lp1;j<= p;j++) zaxpy(n-l,Conjg_d(Cdiv_d(DCmul_d(-1.0,e[j]),e[lp1])), &(work[lp1]),1,&(X(lp1,j)),1); } if (wantv) /* * place the transformation in v for subsequent * back multiplication. */ for (i = lp1;i<= p;i++) V(i,l) = e[i]; } } } /* * set up the final bidiagonal matrix or order m. */ m = min(p,n+1); nctp1 = nct + 1; nrtp1 = nrt + 1; if (nct < p) s[nctp1] = X(nctp1,nctp1); if (n < m) s[m] = Complex_d(0.0,0.0); if (nrtp1 < m) e[nrtp1] = X(nrtp1,m); e[m] = Complex_d(0.0,0.0); /* * if required, generate u. */ if (wantu) { if (ncu >= nctp1) { for (j = nctp1;j<= ncu;j++) { for (i = 1;i<= n;i++) U(i,j) = Complex_d(0.0,0.0); U(j,j) = Complex_d(1.0,0.0); } } if (nct >= 1) { for (ll = 1;ll<= nct;ll++) { l = nct - ll + 1; if (cabs1(s[l]) == 0.0) { for (i = 1;i<= n;i++) U(i,l) = Complex_d(0.0,0.0); U(l,l) = Complex_d(1.0,0.0); } else { lp1 = l + 1; if (ncu >= lp1) { for (j = lp1;j<=ncu;j++) { t = DCmul_d(-1.0,Cdiv_d(zdotc(n-l+1,&(U(l,l)),1,&(U(l,j)),1),U(l,l))); zaxpy(n-l+1,t,&(U(l,l)),1,&(U(l,j)),1); } } zscal(n-l+1,Complex_d(-1.0,0.0),&(U(l,l)),1); U(l,l) = Cadd_d(Complex_d(1.0,0.0), U(l,l)); lm1 = l - 1; if (lm1 >= 1) for (i = 1;i<= lm1;i++) U(i,l) = Complex_d(0.0,0.0); } } } } /* * if it is required, generate v. */ if (wantv) { for (ll = 1;ll<= p;ll++) { l = p - ll + 1; lp1 = l + 1; if ((l <= nrt) && (cabs1(e[l]) != 0.0)) { for (j = lp1;j<= p;j++) { dcomplex dc,dd; dc = zdotc(p-l,&(V(lp1,l)),1,&(V(lp1,j)),1); dd = V(lp1,l); t = Cdiv_d(DCmul_d(-1.0,dc),dd); zaxpy(p-l,t,&(V(lp1,l)),1,&(V(lp1,j)),1); } } for (i = 1;i<= p;i++) V(i,l) = Complex_d(0.0,0.0); V(l,l) = Complex_d(1.0,0.0); } } /* * transform s and e so that they are double precision. */ for (i = 1;i<= m;i++) { if (cabs1(s[i]) != 0.0) { t = Complex_d(Cabs_d(s[i]),0.0); r = Cdiv_d(s[i],t); s[i] = t; if (i < m) e[i] = Cdiv_d(e[i],r); if (wantu) zscal(n,r,&(U(1,i)),1); } /* * ...exit */ if (i == m) break; if (cabs1(e[i]) != 0.0) { t = Complex_d(Cabs_d(e[i]),0.0); r = Cdiv_d(t,e[i]); e[i] = t; s[i+1] = Cmul_d(s[i+1],r); if (wantv) zscal(p,r,&(V(1,i+1)),1); } } /* * main iteration loop for the singular values. */ mm = m; iter = 0; while (1) { /* * quit if all the singular values have been found. * * ...exit */ if (m == 0) { break; } /* * if too many iterations have been performed, set * flag and return. */ if (iter >= maxit) { *info = m; /* * ......exit */ break; } /* * this section of the program inspects for * negligible elements in the s and e arrays. on * completion the variables kase and l are set as follows. * * kase = 1 if s[m] and e(l-1) are negligible and l<m * kase = 2 if s(l) is negligible and l<m * kase = 3 if e(l-1) is negligible, l<m, and * s(l), ..., s[m] are not negligible (qr step). * kase = 4 if e(m-1) is negligible (convergence). */ for (ll = 1;ll<= m;ll++) { l = m - ll; if (l == 0) break; test = Cabs_d(s[l]) + Cabs_d(s[l+1]); ztest = test + Cabs_d(e[l]); if (ztest == test) { e[l] = Complex_d(0.0,0.0); break; } } if (l == m - 1) { kase = 4; } else { lp1 = l + 1; mp1 = m + 1; for (lls = lp1;lls<= mp1;lls++) { ls = m - lls + lp1; if (ls == l) break; test = 0.0; if (ls != m) test += Cabs_d(e[ls]); if (ls != l + 1) test += Cabs_d(e[ls-1]); ztest = test + Cabs_d(s[ls]); if (ztest == test) { s[ls] = Complex_d(0.0,0.0); break; } } if (ls == l) { kase = 3; } else { if (ls == m) { kase = 1; } else { kase = 2; l = ls; } } } l++; /* * perform the task indicated by kase. */ switch (kase) { /* * deflate negligible s[m]. */ case 1 : mm1 = m - 1; f = e[m-1].r; e[m-1] = Complex_d(0.0,0.0); for (kk = l;kk<= mm1;kk++) { k = mm1 - kk + l; t1 = s[k].r; drotg(&t1,&f,&cs,&sn); s[k] = Complex_d(t1,0.0); if (k != l) { f = -sn * e[k-1].r; e[k-1] = DCmul_d(cs, e[k-1]); } if (wantv) zdrot(p,&(V(1,k)),1,&(V(1,m)),1,cs,sn); } break; /* * split at negligible s(l). */ case 2 : f = e[l-1].r; e[l-1] = Complex_d(0.0,0.0); for (k = l;k<= m;k++) { t1 = s[k].r; drotg(&t1,&f,&cs,&sn); s[k] = Complex_d(t1,0.0); f = -sn*e[k].r; e[k] = DCmul_d(cs,e[k]); if (wantu) zdrot(n,&(U(1,k)),1,&(U(1,l-1)),1,cs,sn); } break; /* * perform one qr step. */ case 3 : /* * calculate the shift. */ scale = dmax1(Cabs_d(s[m]),Cabs_d(s[m-1]),Cabs_d(e[m-1]), Cabs_d(s[l]),Cabs_d(e[l])); sm = s[m].r/scale; smm1 = s[m-1].r/scale; emm1 = e[m-1].r/scale; sl = s[l].r/scale; el = e[l].r/scale; b = ((smm1 + sm)*(smm1 - sm) + emm1*emm1)/2.0; c = pow(sm*emm1,2); shift = 0.0; if (b != 0.0 || c != 0.0) { shift = sqrt(b*b+c); if (b < 0.0) shift = -shift; shift = c/(b + shift); } f = (sl + sm)*(sl - sm) + shift; g = sl*el; /* * chase zeros. */ mm1 = m - 1; for (k = l;k<= mm1;k++) { drotg(&f,&g,&cs,&sn); if (k != l) e[k-1] = Complex_d(f,0.0); f = cs * s[k].r + sn * e[k].r; e[k] = Csub_d(DCmul_d(cs,e[k]), DCmul_d(sn,s[k])); g = sn * s[k+1].r; s[k+1] = DCmul_d(cs,s[k+1]); if (wantv) zdrot(p,&(V(1,k)),1,&(V(1,k+1)),1,cs,sn); drotg(&f,&g,&cs,&sn); s[k] = Complex_d(f,0.0); f = cs * e[k].r + sn * s[k+1].r; s[k+1] = Cadd_d(DCmul_d(-sn,e[k]), DCmul_d(cs,s[k+1])); g = sn * e[k+1].r; e[k+1] = DCmul_d(cs,e[k+1]); if (wantu && k < n) zdrot(n,&(U(1,k)),1,&(U(1,k+1)),1,cs,sn); } e[m-1] = Complex_d(f,0.0); iter++; break; /* * convergence. */ case 4 : /* * make the singular value positive */ if (s[l].r < 0.0) { s[l] = DCmul_d(-1.0,s[l]); if (wantv) zscal(p,Complex_d(-1.0,0.0),&(V(1,l)),1); } /* * order the singular value. */ while (l != mm) { if (s[l].r >= s[l+1].r) break; t = s[l]; s[l] = s[l+1]; s[l+1] = t; if (wantv && l < p) zswap(p,&(V(1,l)),1,&(V(1,l+1)),1); if (wantu && l < n) zswap(n,&(U(1,l)),1,&(U(1,l+1)),1); l++; } iter = 0; m--; } } }
/* * ********** * * subroutine lmdif * * the purpose of lmdif is to minimize the sum of the squares of * m nonlinear functions in n variables by a modification of * the levenberg-marquardt algorithm. the user must provide a * subroutine which calculates the functions. the jacobian is * then calculated by a forward-difference approximation. * * the subroutine statement is * * subroutine lmdif(fcn,m,n,x,fvec,ftol,xtol,gtol,maxfev,epsfcn, * diag,mode,factor,nprint,info,nfev,fjac, * ldfjac,ipvt,qtf,wa1,wa2,wa3,wa4) * * where * * fcn is the name of the user-supplied subroutine which * calculates the functions. fcn must be declared * in an external statement in the user calling * program, and should be written as follows. * * subroutine fcn(m,n,x,fvec,iflag) * integer m,n,iflag * double precision x(n),fvec(m) * ---------- * calculate the functions at x and * return this vector in fvec. * ---------- * return * end * * the value of iflag should not be changed by fcn unless * the user wants to terminate execution of lmdif. * in this case set iflag to a negative integer. * * m is a positive integer input variable set to the number * of functions. * * n is a positive integer input variable set to the number * of variables. n must not exceed m. * * x is an array of length n. on input x must contain * an initial estimate of the solution vector. on output x * contains the final estimate of the solution vector. * * fvec is an output array of length m which contains * the functions evaluated at the output x. * * ftol is a nonnegative input variable. termination * occurs when both the actual and predicted relative * reductions in the sum of squares are at most ftol. * therefore, ftol measures the relative error desired * in the sum of squares. * * xtol is a nonnegative input variable. termination * occurs when the relative error between two consecutive * iterates is at most xtol. therefore, xtol measures the * relative error desired in the approximate solution. * * gtol is a nonnegative input variable. termination * occurs when the cosine of the angle between fvec and * any column of the jacobian is at most gtol in absolute * value. therefore, gtol measures the orthogonality * desired between the function vector and the columns * of the jacobian. * * maxfev is a positive integer input variable. termination * occurs when the number of calls to fcn is at least * maxfev by the end of an iteration. * * epsfcn is an input variable used in determining a suitable * step length for the forward-difference approximation. this * approximation assumes that the relative errors in the * functions are of the order of epsfcn. if epsfcn is less * than the machine precision, it is assumed that the relative * errors in the functions are of the order of the machine * precision. * * diag is an array of length n. if mode = 1 (see * below), diag is internally set. if mode = 2, diag * must contain positive entries that serve as * multiplicative scale factors for the variables. * * mode is an integer input variable. if mode = 1, the * variables will be scaled internally. if mode = 2, * the scaling is specified by the input diag. other * values of mode are equivalent to mode = 1. * * factor is a positive input variable used in determining the * initial step bound. this bound is set to the product of * factor and the euclidean norm of diag*x if nonzero, or else * to factor itself. in most cases factor should lie in the * interval (.1,100.). 100. is a generally recommended value. * * nprint is an integer input variable that enables controlled * printing of iterates if it is positive. in this case, * fcn is called with iflag = 0 at the beginning of the first * iteration and every nprint iterations thereafter and * immediately prior to return, with x and fvec available * for printing. if nprint is not positive, no special calls * of fcn with iflag = 0 are made. * * info is an integer output variable. if the user has * terminated execution, info is set to the (negative) * value of iflag. see description of fcn. otherwise, * info is set as follows. * * info = 0 improper input parameters. * * info = 1 both actual and predicted relative reductions * in the sum of squares are at most ftol. * * info = 2 relative error between two consecutive iterates * is at most xtol. * * info = 3 conditions for info = 1 and info = 2 both hold. * * info = 4 the cosine of the angle between fvec and any * column of the jacobian is at most gtol in * absolute value. * * info = 5 number of calls to fcn has reached or * exceeded maxfev. * * info = 6 ftol is too small. no further reduction in * the sum of squares is possible. * * info = 7 xtol is too small. no further improvement in * the approximate solution x is possible. * * info = 8 gtol is too small. fvec is orthogonal to the * columns of the jacobian to machine precision. * * nfev is an integer output variable set to the number of * calls to fcn. * * fjac is an output m by n array. the upper n by n submatrix * of fjac contains an upper triangular matrix r with * diagonal elements of nonincreasing magnitude such that * * t t t * p *(jac *jac)*p = r *r, * * where p is a permutation matrix and jac is the final * calculated jacobian. column j of p is column ipvt(j) * (see below) of the identity matrix. the lower trapezoidal * part of fjac contains information generated during * the computation of r. * * ldfjac is a positive integer input variable not less than m * which specifies the leading dimension of the array fjac. * * ipvt is an integer output array of length n. ipvt * defines a permutation matrix p such that jac*p = q*r, * where jac is the final calculated jacobian, q is * orthogonal (not stored), and r is upper triangular * with diagonal elements of nonincreasing magnitude. * column j of p is column ipvt(j) of the identity matrix. * * qtf is an output array of length n which contains * the first n elements of the vector (q transpose)*fvec. * * wa1, wa2, and wa3 are work arrays of length n. * * wa4 is a work array of length m. * * subprograms called * * user-supplied ...... fcn * * minpack-supplied ... dpmpar,enorm,fdjac2,lmpar,qrfac * * fortran-supplied ... dabs,dmax1,dmin1,dsqrt,mod * * argonne national laboratory. minpack project. march 1980. * burton s. garbow, kenneth e. hillstrom, jorge j. more * * ********** */ void lmdif_C( void (*fcn)(int, int, double[], double[], int *, void *), int m, int n, double x[], double fvec[], double ftol, double xtol, double gtol, int maxfev, double epsfcn, double diag[], int mode, double factor, int nprint, int *info, int *nfev, double fjac[], int ldfjac, int ipvt[], double qtf[], double wa1[], double wa2[], double wa3[], double wa4[], void *data) { int i; int iflag; int ij; int jj; int iter; int j; int l; double actred; double delta; double dirder; double fnorm; double fnorm1; double gnorm; double par; double pnorm; double prered; double ratio; double sum; double temp; double temp1; double temp2; double temp3; double xnorm; static double one = 1.0; static double p1 = 0.1; static double p5 = 0.5; static double p25 = 0.25; static double p75 = 0.75; static double p0001 = 1.0e-4; static double zero = 0.0; //static double p05 = 0.05; *info = 0; iflag = 0; *nfev = 0; /* * check the input parameters for errors. */ if ((n <= 0) || (m < n) || (ldfjac < m) || (ftol < zero) || (xtol < zero) || (gtol < zero) || (maxfev <= 0) || (factor <= zero)) goto L300; if (mode == 2) { /* scaling by diag[] */ for (j=0; j<n; j++) { if (diag[j] <= 0.0) goto L300; } } #ifdef BUG printf( "lmdif\n" ); #endif /* evaluate the function at the starting point * and calculate its norm. */ iflag = 1; fcn(m,n,x,fvec,&iflag, data); *nfev = 1; if (iflag < 0) goto L300; fnorm = enorm(m,fvec); /* initialize levenberg-marquardt parameter and iteration counter. */ par = zero; iter = 1; /* beginning of the outer loop. */ L30: /* calculate the jacobian matrix. */ iflag = 2; fdjac2(fcn, m,n,x,fvec,fjac,ldfjac,&iflag,epsfcn,wa4, data); // commented out DKB // *nfev += n; if (iflag < 0) goto L300; /* if requested, call fcn to enable printing of iterates. */ if (nprint > 0) { iflag = 0; if (mod(iter-1,nprint) == 0) { fcn(m,n,x,fvec,&iflag, data); if (iflag < 0) goto L300; // printf( "fnorm %.15e\n", enorm(m,fvec)); } } /* compute the qr factorization of the jacobian. */ qrfac(m,n,fjac,ldfjac,1,ipvt,n,wa1,wa2,wa3); // for (j = 0; j < n; j++) // { // printf("wa1[%d] = %e\n", j, wa1[j]); // printf("wa2[%d] = %e\n", j, wa2[j]); // printf("wa3[%d] = %e\n", j, wa3[j]); // } /* on the first iteration and if mode is 1, scale according * to the norms of the columns of the initial jacobian. */ if (iter == 1) { // printf("iter = 1, mode = %d\n", mode); if (mode != 2) { for (j=0; j<n; j++) { diag[j] = wa2[j]; if (wa2[j] == zero) diag[j] = one; } } /* on the first iteration, calculate the norm of the scaled x * and initialize the step bound delta. */ for (j=0; j<n; j++) wa3[j] = diag[j] * x[j]; xnorm = enorm(n,wa3); delta = factor*xnorm; // printf("iter1: xnorm = %e, delta = %e\n", xnorm, delta); //dkb if (fabs(delta) <= 1e-4) // if (delta == zero) delta = factor; } /* form (q transpose)*fvec and store the first n components in qtf. */ for (i=0; i<m; i++) wa4[i] = fvec[i]; jj = 0; for (j=0; j<n; j++) { temp3 = fjac[jj]; if (temp3 != zero) { sum = zero; ij = jj; for (i=j; i<m; i++) { sum += fjac[ij] * wa4[i]; ij += 1; /* fjac[i+m*j] */ } temp = -sum / temp3; ij = jj; for (i=j; i<m; i++) { wa4[i] += fjac[ij] * temp; ij += 1; /* fjac[i+m*j] */ } } fjac[jj] = wa1[j]; jj += m+1; /* fjac[j+m*j] */ qtf[j] = wa4[j]; } /* compute the norm of the scaled gradient. */ gnorm = zero; if (fnorm != zero) { jj = 0; for (j=0; j<n; j++) { l = ipvt[j]; if (wa2[l] != zero) { sum = zero; ij = jj; for (i=0; i<=j; i++) { sum += fjac[ij]*(qtf[i]/fnorm); ij += 1; /* fjac[i+m*j] */ } gnorm = dmax1(gnorm,fabs(sum/wa2[l])); } jj += m; } } /* test for convergence of the gradient norm. */ if (gnorm <= gtol) *info = 4; if (*info != 0) goto L300; //for (j = 0; j < n; j++) // printf("diag[%d] = %e, wa2[%d] = %e\n", j, diag[j], j, wa2[j]); /* rescale if necessary. */ if (mode != 2) { for (j=0; j<n; j++) diag[j] = dmax1(diag[j],wa2[j]); } /* beginning of the inner loop. */ L200: /* determine the levenberg-marquardt parameter. */ lmpar(n,fjac,ldfjac,ipvt,diag,qtf,delta,&par,wa1,wa2,wa3,wa4); /* store the direction p and x + p. calculate the norm of p. */ for (j=0; j<n; j++) { wa1[j] = -wa1[j]; wa2[j] = x[j] + wa1[j]; wa3[j] = diag[j]*wa1[j]; //printf("wa2[%d] = %e + %e = %e\n", j, x[j], wa1[j], wa2[j]); } pnorm = enorm(n,wa3); /* on the first iteration, adjust the initial step bound. */ if (iter == 1) delta = dmin1(delta,pnorm); /* evaluate the function at x + p and calculate its norm. */ iflag = 1; //printf("evaluate at:\n"); //for (j=0; j<n; j++) // printf("wa2[%d] = %e\n", j, wa2[j]); fcn(m,n,wa2,wa4,&iflag, data); *nfev += 1; if (iflag < 0) goto L300; fnorm1 = enorm(m,wa4); #ifdef BUG printf( "pnorm %.10e fnorm1 %.10e\n", pnorm, fnorm1 ); #endif /* compute the scaled actual reduction. */ actred = -one; if ((p1*fnorm1) < fnorm) { temp = fnorm1/fnorm; actred = one - temp * temp; } /* compute the scaled predicted reduction and * the scaled directional derivative. */ jj = 0; for (j=0; j<n; j++) { wa3[j] = zero; l = ipvt[j]; temp = wa1[l]; ij = jj; for (i=0; i<=j; i++) { wa3[i] += fjac[ij]*temp; ij += 1; /* fjac[i+m*j] */ } jj += m; } temp1 = enorm(n,wa3)/fnorm; temp2 = (sqrt(par)*pnorm)/fnorm; prered = temp1*temp1 + (temp2*temp2)/p5; dirder = -(temp1*temp1 + temp2*temp2); /* compute the ratio of the actual to the predicted reduction. */ ratio = zero; if (prered != zero) ratio = actred/prered; /* update the step bound. */ if (ratio <= p25) { if (actred >= zero) temp = p5; else temp = p5*dirder/(dirder + p5*actred); if (((p1*fnorm1) >= fnorm) || (temp < p1)) temp = p1; delta = temp*dmin1(delta,pnorm/p1); par = par/temp; } else { if ((par == zero) || (ratio >= p75)) { delta = pnorm/p5; par = p5*par; } } /* test for successful iteration. */ if (ratio >= p0001) { /* successful iteration. update x, fvec, and their norms. */ for (j=0; j<n; j++) { x[j] = wa2[j]; wa2[j] = diag[j]*x[j]; } for (i=0; i<m; i++) fvec[i] = wa4[i]; xnorm = enorm(n,wa2); fnorm = fnorm1; iter += 1; } /* tests for convergence. */ if ((fabs(actred) <= ftol) && (prered <= ftol) && (p5*ratio <= one)) { *info = 1; } if (delta <= xtol*xnorm) *info = 2; if ((fabs(actred) <= ftol) && (prered <= ftol) && (p5*ratio <= one) && (*info == 2)) { *info = 3; } if (*info != 0) goto L300; /* tests for termination and stringent tolerances. */ if (*nfev >= maxfev) *info = 5; if ((fabs(actred) <= MACHEP) && (prered <= MACHEP) && (p5*ratio <= one)) { *info = 6; } if (delta <= MACHEP*xnorm) *info = 7; if (gnorm <= MACHEP) *info = 8; if (*info != 0) goto L300; /* end of the inner loop. repeat if iteration unsuccessful. */ if (ratio < p0001) goto L200; /* end of the outer loop. */ goto L30; L300: /* termination, either normal or user imposed. */ if (iflag < 0) *info = iflag; iflag = 0; if (nprint > 0) fcn(m,n,x,fvec,&iflag, data); }
/* ********** * * subroutine lmpar * * given an m by n matrix a, an n by n nonsingular diagonal * matrix d, an m-vector b, and a positive number delta, * the problem is to determine a value for the parameter * par such that if x solves the system * * a*x = b , sqrt(par)*d*x = 0 , * * in the least squares sense, and dxnorm is the euclidean * norm of d*x, then either par is zero and * * (dxnorm-delta) .le. 0.1*delta , * * or par is positive and * * abs(dxnorm-delta) .le. 0.1*delta . * * this subroutine completes the solution of the problem * if it is provided with the necessary information from the * qr factorization, with column pivoting, of a. that is, if * a*p = q*r, where p is a permutation matrix, q has orthogonal * columns, and r is an upper triangular matrix with diagonal * elements of nonincreasing magnitude, then lmpar expects * the full upper triangle of r, the permutation matrix p, * and the first n components of (q transpose)*b. on output * lmpar also provides an upper triangular matrix s such that * * t t t * p *(a *a + par*d*d)*p = s *s . * * s is employed within lmpar and may be of separate interest. * * only a few iterations are generally needed for convergence * of the algorithm. if, however, the limit of 10 iterations * is reached, then the output par will contain the best * value obtained so far. * * the subroutine statement is * * subroutine lmpar(n,r,ldr,ipvt,diag,qtb,delta,par,x,sdiag, * wa1,wa2) * * where * * n is a positive integer input variable set to the order of r. * * r is an n by n array. on input the full upper triangle * must contain the full upper triangle of the matrix r. * on output the full upper triangle is unaltered, and the * strict lower triangle contains the strict upper triangle * (transposed) of the upper triangular matrix s. * * ldr is a positive integer input variable not less than n * which specifies the leading dimension of the array r. * * ipvt is an integer input array of length n which defines the * permutation matrix p such that a*p = q*r. column j of p * is column ipvt(j) of the identity matrix. * * diag is an input array of length n which must contain the * diagonal elements of the matrix d. * * qtb is an input array of length n which must contain the first * n elements of the vector (q transpose)*b. * * delta is a positive input variable which specifies an upper * bound on the euclidean norm of d*x. * * par is a nonnegative variable. on input par contains an * initial estimate of the levenberg-marquardt parameter. * on output par contains the final estimate. * * x is an output array of length n which contains the least * squares solution of the system a*x = b, sqrt(par)*d*x = 0, * for the output par. * * sdiag is an output array of length n which contains the * diagonal elements of the upper triangular matrix s. * * wa1 and wa2 are work arrays of length n. * * subprograms called * * minpack-supplied ... dpmpar,enorm,qrsolv * * fortran-supplied ... dabs,dmax1,dmin1,dsqrt * * argonne national laboratory. minpack project. march 1980. * burton s. garbow, kenneth e. hillstrom, jorge j. more * * ********** */ void lmpar( int n, double r[], int ldr, int ipvt[], double diag[], double qtb[], double delta, double *par, double x[], double sdiag[], double wa1[], double wa2[]) { int i; int iter; int ij; int jj; int j; int jm1; int jp1; int k; int l; int nsing; double dxnorm; double fp; double gnorm; double parc; double parl; double paru; double sum; double temp; static double zero = 0.0; //static double one = 1.0; static double p1 = 0.1; static double p001 = 0.001; #ifdef BUG printf( "lmpar\n" ); #endif /* compute and store in x the gauss-newton direction. if the * jacobian is rank-deficient, obtain a least squares solution. */ nsing = n; jj = 0; for (j=0; j<n; j++) { wa1[j] = qtb[j]; if ((r[jj] == zero) && (nsing == n)) nsing = j; if (nsing < n) wa1[j] = zero; jj += ldr+1; /* [j+ldr*j] */ } #ifdef BUG printf( "nsing %d ", nsing ); #endif if (nsing >= 1) { for (k=0; k<nsing; k++) { j = nsing - k - 1; wa1[j] = wa1[j]/r[j+ldr*j]; temp = wa1[j]; jm1 = j - 1; if (jm1 >= 0) { ij = ldr * j; for (i=0; i<=jm1; i++) { wa1[i] -= r[ij]*temp; ij += 1; } } } } for (j=0; j<n; j++) { l = ipvt[j]; x[l] = wa1[j]; } /* initialize the iteration counter. * evaluate the function at the origin, and test * for acceptance of the gauss-newton direction. */ iter = 0; for (j=0; j<n; j++) wa2[j] = diag[j]*x[j]; dxnorm = enorm(n,wa2); fp = dxnorm - delta; if (fp <= p1*delta) { #ifdef BUG printf( "going to L220\n" ); #endif goto L220; } /* if the jacobian is not rank deficient, the newton * step provides a lower bound, parl, for the zero of * the function. otherwise set this bound to zero. */ parl = zero; if (nsing >= n) { for (j=0; j<n; j++) { l = ipvt[j]; wa1[j] = diag[l]*(wa2[l]/dxnorm); } jj = 0; for (j=0; j<n; j++) { sum = zero; jm1 = j - 1; if (jm1 >= 0) { ij = jj; for (i=0; i<=jm1; i++) { sum += r[ij]*wa1[i]; ij += 1; } } wa1[j] = (wa1[j] - sum)/r[j+ldr*j]; jj += ldr; /* [i+ldr*j] */ } temp = enorm(n,wa1); parl = ((fp/delta)/temp)/temp; } /* calculate an upper bound, paru, for the zero of the function. */ jj = 0; for (j=0; j<n; j++) { sum = zero; ij = jj; for (i=0; i<=j; i++) { sum += r[ij]*qtb[i]; ij += 1; } l = ipvt[j]; wa1[j] = sum/diag[l]; jj += ldr; /* [i+ldr*j] */ } gnorm = enorm(n,wa1); paru = gnorm/delta; if(paru == zero) paru = DWARF/dmin1(delta,p1); /* if the input par lies outside of the interval (parl,paru), * set par to the closer endpoint. */ *par = dmax1(*par,parl); *par = dmin1(*par,paru); if (*par == zero) *par = gnorm/dxnorm; #ifdef BUG printf( "parl %.4e par %.4e paru %.4e\n", parl, *par, paru ); #endif /* beginning of an iteration. */ L150: iter += 1; /* evaluate the function at the current value of par. */ if (*par == zero) *par = dmax1(DWARF,p001*paru); temp = sqrt(*par); for (j=0; j<n; j++) wa1[j] = temp*diag[j]; qrsolv(n,r,ldr,ipvt,wa1,qtb,x,sdiag,wa2); for (j=0; j<n; j++) wa2[j] = diag[j]*x[j]; dxnorm = enorm(n,wa2); temp = fp; fp = dxnorm - delta; /* if the function is small enough, accept the current value * of par. also test for the exceptional cases where parl * is zero or the number of iterations has reached 10. */ if ((fabs(fp) <= p1*delta) || ((parl == zero) && (fp <= temp) && (temp < zero)) || (iter == 10)) { goto L220; } /* compute the newton correction. */ for (j=0; j<n; j++) { l = ipvt[j]; wa1[j] = diag[l]*(wa2[l]/dxnorm); } jj = 0; for (j=0; j<n; j++) { wa1[j] = wa1[j]/sdiag[j]; temp = wa1[j]; jp1 = j + 1; if (jp1 < n) { ij = jp1 + jj; for (i=jp1; i<n; i++) { wa1[i] -= r[ij]*temp; ij += 1; /* [i+ldr*j] */ } } jj += ldr; /* ldr*j */ } temp = enorm(n,wa1); parc = ((fp/delta)/temp)/temp; /* depending on the sign of the function, update parl or paru. */ if (fp > zero) parl = dmax1(parl, *par); if (fp < zero) paru = dmin1(paru, *par); /* compute an improved estimate for par. */ *par = dmax1(parl, *par + parc); /* end of an iteration. */ goto L150; L220: /* termination. */ if (iter == 0) *par = zero; }
/* * ********** * * subroutine qrfac * * this subroutine uses householder transformations with column * pivoting (optional) to compute a qr factorization of the * m by n matrix a. that is, qrfac determines an orthogonal * matrix q, a permutation matrix p, and an upper trapezoidal * matrix r with diagonal elements of nonincreasing magnitude, * such that a*p = q*r. the householder transformation for * column k, k = 1,2,...,min(m,n), is of the form * * t * i - (1/u(k))*u*u * * where u has zeros in the first k-1 positions. the form of * this transformation and the method of pivoting first * appeared in the corresponding linpack subroutine. * * the subroutine statement is * * subroutine qrfac(m,n,a,lda,pivot,ipvt,lipvt,rdiag,acnorm,wa) * * where * * m is a positive integer input variable set to the number * of rows of a. * * n is a positive integer input variable set to the number * of columns of a. * * a is an m by n array. on input a contains the matrix for * which the qr factorization is to be computed. on output * the strict upper trapezoidal part of a contains the strict * upper trapezoidal part of r, and the lower trapezoidal * part of a contains a factored form of q (the non-trivial * elements of the u vectors described above). * * lda is a positive integer input variable not less than m * which specifies the leading dimension of the array a. * * pivot is a logical input variable. if pivot is set true, * then column pivoting is enforced. if pivot is set false, * then no column pivoting is done. * * ipvt is an integer output array of length lipvt. ipvt * defines the permutation matrix p such that a*p = q*r. * column j of p is column ipvt(j) of the identity matrix. * if pivot is false, ipvt is not referenced. * * lipvt is a positive integer input variable. if pivot is false, * then lipvt may be as small as 1. if pivot is true, then * lipvt must be at least n. * * rdiag is an output array of length n which contains the * diagonal elements of r. * * acnorm is an output array of length n which contains the * norms of the corresponding columns of the input matrix a. * if this information is not needed, then acnorm can coincide * with rdiag. * * wa is a work array of length n. if pivot is false, then wa * can coincide with rdiag. * * subprograms called * * minpack-supplied ... dpmpar,enorm * * fortran-supplied ... dmax1,dsqrt,min0 * * argonne national laboratory. minpack project. march 1980. * burton s. garbow, kenneth e. hillstrom, jorge j. more * * ********** */ void qrfac(int m, int n, double a[], int lda, int pivot, int ipvt[], int lipvt, double rdiag[], double acnorm[], double wa[]) { int i; int ij; int jj; int j; int jp1; int k; int kmax; int minmn; double ajnorm; double sum; double temp; static double zero = 0.0; static double one = 1.0; static double p05 = 0.05; /* compute the initial column norms and initialize several arrays. */ //printf("\nqrfac\n"); ij = 0; for (j=0; j<n; j++) { acnorm[j] = enorm(m,&a[ij]); rdiag[j] = acnorm[j]; wa[j] = rdiag[j]; if (pivot != 0) ipvt[j] = j; ij += m; /* m*j */ // printf("acnorm[%d] = %e\n", j, acnorm[j]); // printf("rdiag[%d] = %e\n", j, rdiag[j]); } #ifdef BUG printf( "qrfac\n" ); #endif /* reduce a to r with householder transformations. */ minmn = min0(m,n); for (j=0; j<minmn; j++) { if (pivot == 0) goto L40; /* bring the column of largest norm into the pivot position. */ kmax = j; for (k=j; k<n; k++) { if (rdiag[k] > rdiag[kmax]) kmax = k; } if (kmax == j) goto L40; ij = m * j; jj = m * kmax; for (i=0; i<m; i++) { temp = a[ij]; /* [i+m*j] */ a[ij] = a[jj]; /* [i+m*kmax] */ a[jj] = temp; ij += 1; jj += 1; } rdiag[kmax] = rdiag[j]; wa[kmax] = wa[j]; k = ipvt[j]; ipvt[j] = ipvt[kmax]; ipvt[kmax] = k; L40: /* compute the householder transformation to reduce the * j-th column of a to a multiple of the j-th unit vector. */ jj = j + m*j; ajnorm = enorm(m-j,&a[jj]); if (ajnorm == zero) goto L100; if (a[jj] < zero) ajnorm = -ajnorm; ij = jj; for (i=j; i<m; i++) { a[ij] /= ajnorm; ij += 1; /* [i+m*j] */ } a[jj] += one; /* apply the transformation to the remaining columns * and update the norms. */ jp1 = j + 1; if (jp1 < n) { for (k=jp1; k<n; k++) { sum = zero; ij = j + m*k; jj = j + m*j; for (i=j; i<m; i++) { sum += a[jj]*a[ij]; ij += 1; /* [i+m*k] */ jj += 1; /* [i+m*j] */ } temp = sum/a[j+m*j]; ij = j + m*k; jj = j + m*j; for (i=j; i<m; i++) { a[ij] -= temp*a[jj]; ij += 1; /* [i+m*k] */ jj += 1; /* [i+m*j] */ } if ((pivot != 0) && (rdiag[k] != zero)) { temp = a[j+m*k]/rdiag[k]; temp = dmax1(zero, one-temp*temp); rdiag[k] *= sqrt(temp); temp = rdiag[k]/wa[k]; if ((p05*temp*temp) <= MACHEP) { rdiag[k] = enorm(m-j-1,&a[jp1+m*k]); wa[k] = rdiag[k]; } } } } L100: rdiag[j] = -ajnorm; } }