int leastsq(float **a, int m, int n, float *b) { int i,j; float *d, *c; printf("leastsq(%d x %d)\n",m,n); for(i = 0; i < m; i++) { printf("[ "); for(j = 0; j < n; j++) { printf(" %2.4f", a[i][j]); } printf(" ]"); printf(" = %g\n", b[i]); } c = malloc(sizeof(float)*n); assert(c != NULL); d = malloc(sizeof(float)*n); assert(d != NULL); if (qrdcmp(a,m,n,c,d)) return -1; qrsolv(a,n,c,d,b); free(c); free(d); }
/* ********** * * subroutine lmpar * * given an m by n matrix a, an n by n nonsingular diagonal * matrix d, an m-vector b, and a positive number delta, * the problem is to determine a value for the parameter * par such that if x solves the system * * a*x = b , sqrt(par)*d*x = 0 , * * in the least squares sense, and dxnorm is the euclidean * norm of d*x, then either par is zero and * * (dxnorm-delta) .le. 0.1*delta , * * or par is positive and * * abs(dxnorm-delta) .le. 0.1*delta . * * this subroutine completes the solution of the problem * if it is provided with the necessary information from the * qr factorization, with column pivoting, of a. that is, if * a*p = q*r, where p is a permutation matrix, q has orthogonal * columns, and r is an upper triangular matrix with diagonal * elements of nonincreasing magnitude, then lmpar expects * the full upper triangle of r, the permutation matrix p, * and the first n components of (q transpose)*b. on output * lmpar also provides an upper triangular matrix s such that * * t t t * p *(a *a + par*d*d)*p = s *s . * * s is employed within lmpar and may be of separate interest. * * only a few iterations are generally needed for convergence * of the algorithm. if, however, the limit of 10 iterations * is reached, then the output par will contain the best * value obtained so far. * * the subroutine statement is * * subroutine lmpar(n,r,ldr,ipvt,diag,qtb,delta,par,x,sdiag, * wa1,wa2) * * where * * n is a positive integer input variable set to the order of r. * * r is an n by n array. on input the full upper triangle * must contain the full upper triangle of the matrix r. * on output the full upper triangle is unaltered, and the * strict lower triangle contains the strict upper triangle * (transposed) of the upper triangular matrix s. * * ldr is a positive integer input variable not less than n * which specifies the leading dimension of the array r. * * ipvt is an integer input array of length n which defines the * permutation matrix p such that a*p = q*r. column j of p * is column ipvt(j) of the identity matrix. * * diag is an input array of length n which must contain the * diagonal elements of the matrix d. * * qtb is an input array of length n which must contain the first * n elements of the vector (q transpose)*b. * * delta is a positive input variable which specifies an upper * bound on the euclidean norm of d*x. * * par is a nonnegative variable. on input par contains an * initial estimate of the levenberg-marquardt parameter. * on output par contains the final estimate. * * x is an output array of length n which contains the least * squares solution of the system a*x = b, sqrt(par)*d*x = 0, * for the output par. * * sdiag is an output array of length n which contains the * diagonal elements of the upper triangular matrix s. * * wa1 and wa2 are work arrays of length n. * * subprograms called * * minpack-supplied ... dpmpar,enorm,qrsolv * * fortran-supplied ... dabs,dmax1,dmin1,dsqrt * * argonne national laboratory. minpack project. march 1980. * burton s. garbow, kenneth e. hillstrom, jorge j. more * * ********** */ void lmpar( int n, double r[], int ldr, int ipvt[], double diag[], double qtb[], double delta, double *par, double x[], double sdiag[], double wa1[], double wa2[]) { int i; int iter; int ij; int jj; int j; int jm1; int jp1; int k; int l; int nsing; double dxnorm; double fp; double gnorm; double parc; double parl; double paru; double sum; double temp; static double zero = 0.0; //static double one = 1.0; static double p1 = 0.1; static double p001 = 0.001; #ifdef BUG printf( "lmpar\n" ); #endif /* compute and store in x the gauss-newton direction. if the * jacobian is rank-deficient, obtain a least squares solution. */ nsing = n; jj = 0; for (j=0; j<n; j++) { wa1[j] = qtb[j]; if ((r[jj] == zero) && (nsing == n)) nsing = j; if (nsing < n) wa1[j] = zero; jj += ldr+1; /* [j+ldr*j] */ } #ifdef BUG printf( "nsing %d ", nsing ); #endif if (nsing >= 1) { for (k=0; k<nsing; k++) { j = nsing - k - 1; wa1[j] = wa1[j]/r[j+ldr*j]; temp = wa1[j]; jm1 = j - 1; if (jm1 >= 0) { ij = ldr * j; for (i=0; i<=jm1; i++) { wa1[i] -= r[ij]*temp; ij += 1; } } } } for (j=0; j<n; j++) { l = ipvt[j]; x[l] = wa1[j]; } /* initialize the iteration counter. * evaluate the function at the origin, and test * for acceptance of the gauss-newton direction. */ iter = 0; for (j=0; j<n; j++) wa2[j] = diag[j]*x[j]; dxnorm = enorm(n,wa2); fp = dxnorm - delta; if (fp <= p1*delta) { #ifdef BUG printf( "going to L220\n" ); #endif goto L220; } /* if the jacobian is not rank deficient, the newton * step provides a lower bound, parl, for the zero of * the function. otherwise set this bound to zero. */ parl = zero; if (nsing >= n) { for (j=0; j<n; j++) { l = ipvt[j]; wa1[j] = diag[l]*(wa2[l]/dxnorm); } jj = 0; for (j=0; j<n; j++) { sum = zero; jm1 = j - 1; if (jm1 >= 0) { ij = jj; for (i=0; i<=jm1; i++) { sum += r[ij]*wa1[i]; ij += 1; } } wa1[j] = (wa1[j] - sum)/r[j+ldr*j]; jj += ldr; /* [i+ldr*j] */ } temp = enorm(n,wa1); parl = ((fp/delta)/temp)/temp; } /* calculate an upper bound, paru, for the zero of the function. */ jj = 0; for (j=0; j<n; j++) { sum = zero; ij = jj; for (i=0; i<=j; i++) { sum += r[ij]*qtb[i]; ij += 1; } l = ipvt[j]; wa1[j] = sum/diag[l]; jj += ldr; /* [i+ldr*j] */ } gnorm = enorm(n,wa1); paru = gnorm/delta; if(paru == zero) paru = DWARF/dmin1(delta,p1); /* if the input par lies outside of the interval (parl,paru), * set par to the closer endpoint. */ *par = dmax1(*par,parl); *par = dmin1(*par,paru); if (*par == zero) *par = gnorm/dxnorm; #ifdef BUG printf( "parl %.4e par %.4e paru %.4e\n", parl, *par, paru ); #endif /* beginning of an iteration. */ L150: iter += 1; /* evaluate the function at the current value of par. */ if (*par == zero) *par = dmax1(DWARF,p001*paru); temp = sqrt(*par); for (j=0; j<n; j++) wa1[j] = temp*diag[j]; qrsolv(n,r,ldr,ipvt,wa1,qtb,x,sdiag,wa2); for (j=0; j<n; j++) wa2[j] = diag[j]*x[j]; dxnorm = enorm(n,wa2); temp = fp; fp = dxnorm - delta; /* if the function is small enough, accept the current value * of par. also test for the exceptional cases where parl * is zero or the number of iterations has reached 10. */ if ((fabs(fp) <= p1*delta) || ((parl == zero) && (fp <= temp) && (temp < zero)) || (iter == 10)) { goto L220; } /* compute the newton correction. */ for (j=0; j<n; j++) { l = ipvt[j]; wa1[j] = diag[l]*(wa2[l]/dxnorm); } jj = 0; for (j=0; j<n; j++) { wa1[j] = wa1[j]/sdiag[j]; temp = wa1[j]; jp1 = j + 1; if (jp1 < n) { ij = jp1 + jj; for (i=jp1; i<n; i++) { wa1[i] -= r[ij]*temp; ij += 1; /* [i+ldr*j] */ } } jj += ldr; /* ldr*j */ } temp = enorm(n,wa1); parc = ((fp/delta)/temp)/temp; /* depending on the sign of the function, update parl or paru. */ if (fp > zero) parl = dmax1(parl, *par); if (fp < zero) paru = dmin1(paru, *par); /* compute an improved estimate for par. */ *par = dmax1(parl, *par + parc); /* end of an iteration. */ goto L150; L220: /* termination. */ if (iter == 0) *par = zero; }
/* Subroutine */ void lmpar(int n, double *r__, int ldr, const int *ipvt, const double *diag, const double *qtb, double delta, double *par, double *x, double *sdiag, double *wa1, double *wa2) { /* Initialized data */ #define p1 .1 #define p001 .001 /* System generated locals */ int r_dim1, r_offset, i__1, i__2; double d__1, d__2; /* Local variables */ int i__, j, k, l; double fp; int jm1, jp1; double sum, parc, parl; int iter; double temp, paru, dwarf; int nsing; double gnorm; double dxnorm; /* ********** */ /* subroutine lmpar */ /* given an m by n matrix a, an n by n nonsingular diagonal */ /* matrix d, an m-vector b, and a positive number delta, */ /* the problem is to determine a value for the parameter */ /* par such that if x solves the system */ /* a*x = b , sqrt(par)*d*x = 0 , */ /* in the least squares sense, and dxnorm is the euclidean */ /* norm of d*x, then either par is zero and */ /* (dxnorm-delta) .le. 0.1*delta , */ /* or par is positive and */ /* abs(dxnorm-delta) .le. 0.1*delta . */ /* this subroutine completes the solution of the problem */ /* if it is provided with the necessary information from the */ /* qr factorization, with column pivoting, of a. that is, if */ /* a*p = q*r, where p is a permutation matrix, q has orthogonal */ /* columns, and r is an upper triangular matrix with diagonal */ /* elements of nonincreasing magnitude, then lmpar expects */ /* the full upper triangle of r, the permutation matrix p, */ /* and the first n components of (q transpose)*b. on output */ /* lmpar also provides an upper triangular matrix s such that */ /* t t t */ /* p *(a *a + par*d*d)*p = s *s . */ /* s is employed within lmpar and may be of separate interest. */ /* only a few iterations are generally needed for convergence */ /* of the algorithm. if, however, the limit of 10 iterations */ /* is reached, then the output par will contain the best */ /* value obtained so far. */ /* the subroutine statement is */ /* subroutine lmpar(n,r,ldr,ipvt,diag,qtb,delta,par,x,sdiag, */ /* wa1,wa2) */ /* where */ /* n is a positive integer input variable set to the order of r. */ /* r is an n by n array. on input the full upper triangle */ /* must contain the full upper triangle of the matrix r. */ /* on output the full upper triangle is unaltered, and the */ /* strict lower triangle contains the strict upper triangle */ /* (transposed) of the upper triangular matrix s. */ /* ldr is a positive integer input variable not less than n */ /* which specifies the leading dimension of the array r. */ /* ipvt is an integer input array of length n which defines the */ /* permutation matrix p such that a*p = q*r. column j of p */ /* is column ipvt(j) of the identity matrix. */ /* diag is an input array of length n which must contain the */ /* diagonal elements of the matrix d. */ /* qtb is an input array of length n which must contain the first */ /* n elements of the vector (q transpose)*b. */ /* delta is a positive input variable which specifies an upper */ /* bound on the euclidean norm of d*x. */ /* par is a nonnegative variable. on input par contains an */ /* initial estimate of the levenberg-marquardt parameter. */ /* on output par contains the final estimate. */ /* x is an output array of length n which contains the least */ /* squares solution of the system a*x = b, sqrt(par)*d*x = 0, */ /* for the output par. */ /* sdiag is an output array of length n which contains the */ /* diagonal elements of the upper triangular matrix s. */ /* wa1 and wa2 are work arrays of length n. */ /* subprograms called */ /* minpack-supplied ... dpmpar,enorm,qrsolv */ /* fortran-supplied ... dabs,dmax1,dmin1,dsqrt */ /* argonne national laboratory. minpack project. march 1980. */ /* burton s. garbow, kenneth e. hillstrom, jorge j. more */ /* ********** */ /* Parameter adjustments */ --wa2; --wa1; --sdiag; --x; --qtb; --diag; --ipvt; r_dim1 = ldr; r_offset = 1 + r_dim1 * 1; r__ -= r_offset; /* Function Body */ /* dwarf is the smallest positive magnitude. */ dwarf = dpmpar(2); /* compute and store in x the gauss-newton direction. if the */ /* jacobian is rank-deficient, obtain a least squares solution. */ nsing = n; i__1 = n; for (j = 1; j <= i__1; ++j) { wa1[j] = qtb[j]; if (r__[j + j * r_dim1] == 0. && nsing == n) { nsing = j - 1; } if (nsing < n) { wa1[j] = 0.; } /* L10: */ } if (nsing < 1) { goto L50; } i__1 = nsing; for (k = 1; k <= i__1; ++k) { j = nsing - k + 1; wa1[j] /= r__[j + j * r_dim1]; temp = wa1[j]; jm1 = j - 1; if (jm1 < 1) { goto L30; } i__2 = jm1; for (i__ = 1; i__ <= i__2; ++i__) { wa1[i__] -= r__[i__ + j * r_dim1] * temp; /* L20: */ } L30: /* L40: */ ; } L50: i__1 = n; for (j = 1; j <= i__1; ++j) { l = ipvt[j]; x[l] = wa1[j]; /* L60: */ } /* initialize the iteration counter. */ /* evaluate the function at the origin, and test */ /* for acceptance of the gauss-newton direction. */ iter = 0; i__1 = n; for (j = 1; j <= i__1; ++j) { wa2[j] = diag[j] * x[j]; /* L70: */ } dxnorm = enorm(n, &wa2[1]); fp = dxnorm - delta; if (fp <= p1 * delta) { goto L220; } /* if the jacobian is not rank deficient, the newton */ /* step provides a lower bound, parl, for the zero of */ /* the function. otherwise set this bound to zero. */ parl = 0.; if (nsing < n) { goto L120; } i__1 = n; for (j = 1; j <= i__1; ++j) { l = ipvt[j]; wa1[j] = diag[l] * (wa2[l] / dxnorm); /* L80: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { sum = 0.; jm1 = j - 1; if (jm1 < 1) { goto L100; } i__2 = jm1; for (i__ = 1; i__ <= i__2; ++i__) { sum += r__[i__ + j * r_dim1] * wa1[i__]; /* L90: */ } L100: wa1[j] = (wa1[j] - sum) / r__[j + j * r_dim1]; /* L110: */ } temp = enorm(n, &wa1[1]); parl = fp / delta / temp / temp; L120: /* calculate an upper bound, paru, for the zero of the function. */ i__1 = n; for (j = 1; j <= i__1; ++j) { sum = 0.; i__2 = j; for (i__ = 1; i__ <= i__2; ++i__) { sum += r__[i__ + j * r_dim1] * qtb[i__]; /* L130: */ } l = ipvt[j]; wa1[j] = sum / diag[l]; /* L140: */ } gnorm = enorm(n, &wa1[1]); paru = gnorm / delta; if (paru == 0.) { paru = dwarf / min(delta,p1); } /* if the input par lies outside of the interval (parl,paru), */ /* set par to the closer endpoint. */ *par = max(*par,parl); *par = min(*par,paru); if (*par == 0.) { *par = gnorm / dxnorm; } /* beginning of an iteration. */ L150: ++iter; /* evaluate the function at the current value of par. */ if (*par == 0.) { /* Computing MAX */ d__1 = dwarf, d__2 = p001 * paru; *par = max(d__1,d__2); } temp = sqrt(*par); i__1 = n; for (j = 1; j <= i__1; ++j) { wa1[j] = temp * diag[j]; /* L160: */ } qrsolv(n, &r__[r_offset], ldr, &ipvt[1], &wa1[1], &qtb[1], &x[1], &sdiag[ 1], &wa2[1]); i__1 = n; for (j = 1; j <= i__1; ++j) { wa2[j] = diag[j] * x[j]; /* L170: */ } dxnorm = enorm(n, &wa2[1]); temp = fp; fp = dxnorm - delta; /* if the function is small enough, accept the current value */ /* of par. also test for the exceptional cases where parl */ /* is zero or the number of iterations has reached 10. */ if (abs(fp) <= p1 * delta || (parl == 0. && fp <= temp && temp < 0.) || iter == 10) { goto L220; } /* compute the newton correction. */ i__1 = n; for (j = 1; j <= i__1; ++j) { l = ipvt[j]; wa1[j] = diag[l] * (wa2[l] / dxnorm); /* L180: */ } i__1 = n; for (j = 1; j <= i__1; ++j) { wa1[j] /= sdiag[j]; temp = wa1[j]; jp1 = j + 1; if (n < jp1) { goto L200; } i__2 = n; for (i__ = jp1; i__ <= i__2; ++i__) { wa1[i__] -= r__[i__ + j * r_dim1] * temp; /* L190: */ } L200: /* L210: */ ; } temp = enorm(n, &wa1[1]); parc = fp / delta / temp / temp; /* depending on the sign of the function, update parl or paru. */ if (fp > 0.) { parl = max(parl,*par); } if (fp < 0.) { paru = min(paru,*par); } /* compute an improved estimate for par. */ /* Computing MAX */ d__1 = parl, d__2 = *par + parc; *par = max(d__1,d__2); /* end of an iteration. */ goto L150; L220: /* termination. */ if (iter == 0) { *par = 0.; } return; /* last card of subroutine lmpar. */ } /* lmpar_ */
void lmpar(double *r,int ldr,int N,int *ipvt,double *diag,double *qtb,double delta,double *par,double *x,double *sdiag) { int i,iter,j,jm1,jp1,k,l,nsing; double dxnorm,dwarf,fp,gnorm,parc,parl,paru,p1,p001,sum,temp,zero; double *wa1,*wa2; /* * This routine is a C translation of Fortran Code by * argonne national laboratory. minpack project. march 1980. burton s. garbow, kenneth e. hillstrom, jorge j. more * N is a positive integer input variable set to the order of r. r is an N by N array. on input the full upper triangle must contain the full upper triangle of the matrix r. on output the full upper triangle is unaltered, and the strict lower triangle contains the strict upper triangle (transposed) of the upper triangular matrix s. ldr is a positive integer input variable not less than n which specifies the leading dimension of the array r. ipvt is an integer input array of length N which defines the permutation matrix p such that a*p = q*r. column j of p is column ipvt(j) of the identity matrix. diag is an input array of length N which must contain the diagonal elements of the matrix d. qtb is an input array of length N which must contain the first N elements of the vector (q transpose)*b. delta is a positive input variable which specifies an upper bound on the euclidean norm of d*x. par is a nonnegative variable. on input par contains an initial estimate of the levenberg-marquardt parameter. on output par contains the final estimate. x is an output array of length N which contains the least squares solution of the system a*x = b, sqrt(par)*d*x = 0, for the output par. sdiag is an output array of length N which contains the diagonal elements of the upper triangular matrix s. */ wa1 = (double*) malloc(sizeof(double) *N); wa2 = (double*) malloc(sizeof(double) *N); p1 = 1.0e-01; p001 = 1.0e-03; zero = 0.0; dwarf = 2.22507385852e-308; // compute and store in x the gauss-newton direction. if the // jacobian is rank-deficient, obtain a least squares solution. nsing = N; for(j = 1; j <= N; ++j) { wa1[j-1] = qtb[j-1]; if (r[(j-1)*N+j-1] == zero && nsing == N) { nsing = j - 1; } if (nsing < N) { wa1[j-1] = zero; } } if (nsing >= 1) {//50 for(k = 1; k <= nsing; ++k) { j = nsing - k + 1; wa1[j-1] = wa1[j-1]/r[(j-1)*N+j-1]; temp = wa1[j-1]; jm1 = j - 1; if (jm1 >= 1) { for(i = 1; i <= jm1; ++i) { wa1[i-1] = wa1[i-1] - r[(i-1)*N+j-1]*temp; } } } }//50 for (j = 0; j < N; ++j) { l = ipvt[j]; x[l] = wa1[j]; } // initialize the iteration counter. // evaluate the function at the origin, and test // for acceptance of the gauss-newton direction. iter = 0; for(j = 0; j < N; ++j) { wa2[j] = diag[j]*x[j]; } dxnorm = enorm(wa2,N); fp = dxnorm - delta; if (fp > p1*delta) {//220 // if the jacobian is not rank deficient, the newton // step provides a lower bound, parl, for the zero of // the function. otherwise set this bound to zero. parl = zero; if (nsing >= N) { //120 nsing only takes values upto N for(j = 0; j < N; ++j) { l = ipvt[j]; wa1[j] = diag[l]*(wa2[l]/dxnorm); } for(j = 0; j < N; ++j) { //110 sum = zero; jm1 = j - 1; if (jm1 >= 0) {//100 for(i = 0; i <= jm1; ++i) { //check sum = sum + r[i*N+j]*wa1[i]; } }//100 wa1[j] = (wa1[j] - sum)/r[j*N+j]; }//110 temp = enorm(wa1,N); parl = ((fp/delta)/temp)/temp; }//120 // calculate an upper bound, paru, for the zero of the function. for(j = 0; j < N; ++j) { //140 sum = zero; for(i = 0; i <= j; ++i) { //check sum = sum + r[i*N+j]*qtb[i]; } l = ipvt[j]; wa1[j] = sum/diag[l]; }//140 gnorm = enorm(wa1,N); paru = gnorm/delta; if (paru == zero) { paru = dwarf/pmin(delta,p1); } // if the input par lies outside of the interval (parl,paru), // set par to the closer endpoint. *par = pmax(*par,parl); *par = pmin(*par,paru); if (*par == zero) { *par = gnorm/dxnorm; } //Iteration begins while(1) { iter++; // evaluate the function at the current value of par. if (*par == zero) { *par = pmax(dwarf,p001*paru); } temp = sqrt(*par); for(j = 0; j < N; ++j) { wa1[j] = temp*diag[j]; } qrsolv(r,ldr,N,ipvt,wa1,qtb,x,sdiag); for(j = 0; j < N; ++j) { wa2[j] = diag[j]*x[j]; } dxnorm = enorm(wa2,N); temp = fp; fp = dxnorm - delta; // if the function is small enough, accept the current value // of par. also test for the exceptional cases where parl // is zero or the number of iterations has reached 10. if (fabs(fp) <= p1*delta) { break; } if (iter == 10) { break; } if (parl == zero && fp <= temp && temp < zero) { break; } // compute the newton correction. for(j = 0; j < N; ++j) { //180 l = ipvt[j]; wa1[j] = diag[l]*(wa2[l]/dxnorm); }//180 for(j = 0; j < N; ++j) { //210 wa1[j] = wa1[j]/sdiag[j]; temp = wa1[j]; jp1 = j + 1; if (N >= jp1+1) { for(i = jp1; i < N; ++i) { wa1[i] = wa1[i] - r[i*N+j]*temp; } } }//210 temp = enorm(wa1,N); parc = ((fp/delta)/temp)/temp; // depending on the sign of the function, update parl or paru. if (fp > zero) { parl = pmax(parl,*par); } if (fp < zero) { paru = pmin(paru,*par); } // compute an improved estimate for par. *par = pmax(parl,*par+parc); } }//220 if (iter == 0) { *par = zero; } free(wa1); free(wa2); }
/* PROCEDURE : lmpar * * ENTREE : * n Ordre de la matrice "r". * r Matrice de taille "n" x "n". En entree, la toute la partie * triangulaire superieure doit contenir toute la partie triangulaire * superieure de "r". * * ldr Taille maximum de la matrice "r". "ldr" >= "n". * * ipvt Vecteur de taille "n" qui definit la matrice de permutation "p" * tel que : * a * p = q * r. * La jeme colonne de p la colonne ipvt[j] de la matrice d'identite. * * diag Vecteur de taille "n" contenant les elements diagonaux de la * matrice "d". * * qtb Vecteur de taille "n" contenant les "n" premiers elements du * vecteur (q transpose)*b. * * delta Limite superieure de la norme euclidienne de d * x. * * par Estimee initiale du parametre de Levenberg-Marquardt. * wa1, wa2 Vecteurs de taille "n" de travail. * * DESCRIPTION : * La procedure determine le parametre de Levenberg-Marquardt. Soit une matrice * "a" de taille "m" x "n", une matrice diagonale "d" non singuliere de taille * "n" x "n", un vecteur "b" de taille "m" et un nombre positf delta, le probleme * est le calcul du parametre "par" de telle facon que si "x" resoud le systeme * * a * x = b , sqrt(par) * d * x = 0 , * * au sens des moindre carre, et dxnorm est la norme euclidienne de d * x * alors "par" vaut 0.0 et (dxnorm - delta) <= 0.1 * delta , * ou "par" est positif et abs(dxnorm-delta) <= 0.1 * delta. * Cette procedure complete la solution du probleme si on lui fourni les infos * nessaires de la factorisation qr, avec pivotage de colonnes de a. * Donc, si a * p = q * r, ou "p" est une matrice de permutation, les colonnes * de "q" sont orthogonales, et "r" est une matrice triangulaire superieure * avec les elements diagonaux classes par ordre decroissant de leur valeur, lmpar * attend une matrice triangulaire superieure complete, la matrice de permutation * "p" et les "n" premiers elements de (q transpose) * b. En sortie, la procedure * lmpar fournit aussi une matrice triangulaire superieure "s" telle que * * t t t * p * (a * a + par * d * d )* p = s * s . * * "s" est utilise a l'interieure de lmpar et peut etre d'un interet separe. * * Seulement quelques iterations sont necessaire pour la convergence de * l'algorithme. Si neanmoins la limite de 10 iterations est atteinte, la * valeur de sortie "par" aura la derniere meilleure valeur. * * SORTIE : * r En sortie, tout le triangle superieur est inchange, et le * le triangle inferieur contient les elements de la partie * triangulaire superieure (transpose) de la matrice triangulaire * superieure de "s". * par Estimee finale du parametre de Levenberg-Marquardt. * x Vecteur de taille "n" contenant la solution au sens des moindres * carres du systeme a * x = b, sqrt(par) * d * x = 0, pour le * parametre en sortie "par" * sdiag Vecteur de taille "n" contenant les elements diagonaux de la * matrice triangulaire "s". * * RETOUR : * En cas de succes, la valeur 0.0 est retournee. * */ int lmpar(int n, double *r, int ldr, int *ipvt, double *diag, double *qtb, double *delta, double *par, double *x, double *sdiag, double *wa1, double *wa2) { const double tol1 = 0.1, tol001 = 0.001; /* tolerance a 0.1 et a 0.001 */ long i, j, jm1, jp1, k, l; /* compteur de boucle */ int iter; /* compteur d'iteration */ int nsing; /* nombre de singularite de la matrice */ double dxnorm, fp, gnorm, parc; double parl, paru; /* borne inf et sup de par */ double sum, temp; double dwarf = DBL_MIN; /* plus petite amplitude positive */ /* * calcul et stockage dans "x" de la direction de Gauss-Newton. Si * le jacobien a une rangee de moins, on a une solution au moindre * carres. */ nsing = n; for (i = 0; i < (long) n; i++) { wa1[i] = qtb[i]; if (*MIJ(r, i, i, ldr) == 0.0 && nsing == n) nsing = (int) i - 1; if (nsing < n) wa1[i] = 0.0; } if ((int) nsing >= 0) { for (k = 0; k < (long) nsing; k++) { i = nsing - 1 - k; wa1[i] /= *MIJ(r, i, i, ldr); temp = wa1[i]; jm1 = i - 1; if (jm1 >= 0) { for (j = 0; j <= jm1; j++) wa1[j] -= *MIJ(r, i, j, ldr) * temp; } } } for (j = 0; j < (long) n; j++) { l = ipvt[j]; x[l] = wa1[j]; } /* * initialisation du compteur d'iteration. * evaluation de la fonction a l'origine, et test * d'acceptation de la direction de Gauss-Newton. */ iter = 0; for (i = 0; i < (long) n; i++) wa2[i] = diag[i] * x[i]; dxnorm = enorm(wa2, n); fp = dxnorm - *delta; if (fp > tol1 * (*delta)) { /* * Si le jacobien n'a pas de rangee deficiente,l'etape de * Newton fournit une limite inferieure, parl pour le * zero de la fonction. Sinon cette limite vaut 0.0. */ parl = 0.0; if (nsing >= n) { for (i = 0; i < (long) n; i++) { l = ipvt[i]; wa1[i] = diag[l] * (wa2[l] / dxnorm); } for (i = 0; i < (long) n; i++) { long im1; sum = 0.0; im1 = (i - 1L); if (im1 >= 0) { for (j = 0; j <= im1; j++) sum += (*MIJ(r, i, j, ldr) * wa1[j]); } wa1[i] = (wa1[i] - sum) / *MIJ(r, i, i, ldr); } temp = enorm(wa1, n); parl = ((fp / *delta) / temp) / temp; } /* * calcul d'une limite superieure, paru, pour le zero de la * fonction. */ for (i = 0; i < (long) n; i++) { sum = 0.0; for (j = 0; j <= i; j++) sum += *MIJ(r, i, j, ldr) * qtb[j]; l = ipvt[i]; wa1[i] = sum / diag[l]; } gnorm = enorm(wa1, n); paru = gnorm / *delta; if (paru == 0.0) paru = dwarf / vpMath::minimum(*delta, tol1); /* * Si "par" en entree tombe hors de l'intervalle (parl,paru), * on le prend proche du point final. */ *par = vpMath::maximum(*par, parl); *par = vpMath::minimum(*par, paru); if (*par == 0.0) *par = gnorm / dxnorm; /* * debut d'une iteration. */ while (iter >= 0) { iter++; /* * evaluation de la fonction a la valeur courant * de "par". */ if (*par == 0.0) *par = vpMath::maximum(dwarf, (tol001 * paru)); temp = sqrt(*par); for (i = 0; i < (long) n; i++) wa1[i] = temp * diag[i]; qrsolv(n, r, ldr, ipvt, wa1, qtb, x, sdiag, wa2); for (i = 0; i < (long) n; i++) wa2[i] = diag[i] * x[i]; dxnorm = enorm(wa2, n); temp = fp; fp = dxnorm - *delta; /* * si la fonction est assez petite, acceptation de * la valeur courant de "par". de plus, test des cas * ou parl est nul et ou le nombre d'iteration a * atteint 10. */ if ((fabs(fp) <= tol1 * *delta) || ((parl == 0.0) && (fp <= temp) && (temp < 0.0)) || (iter == 10)) { /* * terminaison. */ if (iter == 0) *par = 0.0; return (0); } /* * calcul de la correction de Newton. */ for (i = 0; i < (long) n; i++) { l = ipvt[i]; wa1[i] = diag[l] * (wa2[l] / dxnorm); } for (i = 0; i < (long) n; i++) { wa1[i] = wa1[i] / sdiag[i]; temp = wa1[i]; jp1 = i + 1; if ( (long) n >= jp1) { for (j = jp1; j < (long) n; j++) wa1[j] -= (*MIJ(r, i, j, ldr) * temp); } } temp = enorm(wa1, n); parc = ((fp / *delta) / temp) / temp; /* * selon le signe de la fonction, mise a jour * de parl ou paru. */ if (fp > 0.0) parl = vpMath::maximum(parl, *par); if (fp < 0.0) paru = vpMath::minimum(paru, *par); /* * calcul d'une estimee ameliree de "par". */ *par = vpMath::maximum(parl, (*par + parc)); }/* fin boucle sur iter */ }/* fin fp > tol1 * delta */ /* * terminaison. */ if (iter == 0) *par = 0.0; return (0); }
void symmetry_correction(element* mesh_element, node* mesh_node, double* uh_tn_sol_vec, int num_elts, double current_time, parameters* user_param ) { int E=0; int Es=0; int s_dim=0; int idofs=0; int j=0; int found =0; int k=0; double approx_solE[SYSTEM_DIM]; double approx_solEs[SYSTEM_DIM]; double phys_coordsE[2]; double phys_coordsEs[2]; double ref_coordsEs[2]; double ref_coordsE[2]; double error0=0.0; double error1=0.0; double error2=0.0; double max_error0 =0.0; double max_error1 =0.0; double max_error2 =0.0; double* Aloc_matrix = NULL; double* usolE_rhs = NULL; double* usolE_rhs1 = NULL; double* usolE_rhs2 = NULL; double** A_loc_QR; double* Q_coeff; double* R_coeff; int sing_decomp; int i=0; double diff_coefs=0.0; Aloc_matrix = (double*)malloc((NLOC*NLOC)*sizeof(double)); Q_coeff = (double*)malloc((NLOC+1)*sizeof(double)); R_coeff = (double*)malloc((NLOC+1)*sizeof(double)); A_loc_QR = (double**)malloc((NLOC+1)*sizeof(double*)); usolE_rhs = (double*)malloc(NLOC*sizeof(double)); usolE_rhs1 = (double*)malloc(NLOC*sizeof(double)); usolE_rhs2 = (double*)malloc(NLOC*sizeof(double)); for(i=0;i<NLOC+1;i++) A_loc_QR[i] = (double*)malloc((NLOC+1)*sizeof(double)); //printf("current_time %lf \n",current_time); for(E=1;E<num_elts+1;E++) { Es = find_symmetric_element(mesh_element,num_elts,mesh_node,E); max_error0 =0.0; max_error1 =0.0; max_error2 =0.0; for(k=1;k<ngpts+1;k++) { get_approx_solution(uh_tn_sol_vec,E,mesh_element,mesh_node,mesh_element[E].el_gpts_ref_x[k],mesh_element[E].el_gpts_ref_y[k],approx_solE); phys_coordsEs[0] = 1.0 - mesh_element[E].el_gpts_y[k]; phys_coordsEs[1] = 1.0 - mesh_element[E].el_gpts_x[k]; map_to_reference_element(mesh_element,mesh_node,Es,ref_coordsEs,phys_coordsEs[0],phys_coordsEs[1]); get_approx_solution(uh_tn_sol_vec,Es,mesh_element,mesh_node,ref_coordsEs[0],ref_coordsEs[1],approx_solEs); error0 = approx_solEs[0] - approx_solE[0]; error1 = approx_solEs[1] + approx_solE[2]; error2 = approx_solEs[2] + approx_solE[1]; //printf("error0 = %.12e \n",error0); if(error0 > max_error0) { max_error0 = error0; } if(error1 > max_error1) { max_error1 = error1; } if(error2 > max_error2) { max_error2 = error2; } } //if((max_error0 > EPSILON) ||(max_error1 > EPSILON) || (max_error2 > EPSILON)) // printf("%.6e %.6e %.6e \n",max_error0,max_error1,max_error2); if((max_error0 > EPSILON_S) ||(max_error1 > EPSILON_S) || (max_error2 > EPSILON_S)) { init_zero_d(Aloc_matrix,NLOC*NLOC); init_zero_d(usolE_rhs,NLOC); init_zero_d(usolE_rhs1,NLOC); init_zero_d(usolE_rhs2,NLOC); Alocal_mat(Es,mesh_element,mesh_node,user_param,Aloc_matrix); assemble_qr(Aloc_matrix,NLOC,Q_coeff,R_coeff,A_loc_QR,&sing_decomp); for(k=1;k<ngpts+1;k++) { //get_approx_solution(uh_tn_sol_vec,E,mesh_element,mesh_node,mesh_element[E].el_gpts_ref_x[k],mesh_element[E].el_gpts_ref_y[k],approx_solE); phys_coordsE[0] = 1.0 - mesh_element[Es].el_gpts_y[k]; phys_coordsE[1] = 1.0 - mesh_element[Es].el_gpts_x[k]; map_to_reference_element(mesh_element,mesh_node,E,ref_coordsE,phys_coordsE[0],phys_coordsE[1]); get_approx_solution(uh_tn_sol_vec,E,mesh_element,mesh_node,ref_coordsE[0],ref_coordsE[1],approx_solE); for(idofs=0;idofs<NLOC;idofs++) { usolE_rhs[idofs] += approx_solE[0]*mesh_element[Es].el_gpts_basis[(k-1)*NLOC+idofs]*mesh_element[Es].det*mesh_element[Es].el_gpts_w[k]; usolE_rhs1[idofs] += (-1.0*approx_solE[2]*mesh_element[Es].el_gpts_basis[(k-1)*NLOC+idofs]*mesh_element[Es].det*mesh_element[Es].el_gpts_w[k]); usolE_rhs2[idofs] += (-1.0*approx_solE[1]*mesh_element[Es].el_gpts_basis[(k-1)*NLOC+idofs]*mesh_element[Es].det*mesh_element[Es].el_gpts_w[k]); } } if(max_error0 > EPSILON_S) { qrsolv(A_loc_QR,NLOC,Q_coeff,R_coeff,usolE_rhs); for(idofs = 0;idofs < NLOC;idofs++) { uh_tn_sol_vec[SYSTEM_DIM*(Es-1)*NLOC+(0*NLOC+idofs)] = usolE_rhs[idofs]; //diff_coefs = fabs(uh_tn_sol_vec[SYSTEM_DIM*(Es-1)*NLOC+(0*NLOC+idofs)]-usolE_rhs[idofs]); //printf("Es = %10.18lf correction %10.18lf diff = %.8e\n", uh_tn_sol_vec[SYSTEM_DIM*(Es-1)*NLOC+(0*NLOC+idofs)],usolE_rhs[idofs],diff_coefs); } } //printf("-------------------------\n"); init_zero_d(Aloc_matrix,NLOC*NLOC); init_zero_m(A_loc_QR,NLOC+1,NLOC+1); Alocal_mat(Es,mesh_element,mesh_node,user_param,Aloc_matrix); assemble_qr(Aloc_matrix,NLOC,Q_coeff,R_coeff,A_loc_QR,&sing_decomp); if(max_error1 > EPSILON_S) { qrsolv(A_loc_QR,NLOC,Q_coeff,R_coeff,usolE_rhs1); for(idofs = 0;idofs < NLOC;idofs++) { uh_tn_sol_vec[SYSTEM_DIM*(Es-1)*NLOC+(1*NLOC+idofs)] =usolE_rhs1[idofs]; //diff_coefs = fabs(uh_tn_sol_vec[SYSTEM_DIM*(Es-1)*NLOC+(1*NLOC+idofs)]-usolE_rhs1[idofs]); //printf("Es = %10.18lf correction %10.18lf diff = %.8e\n", uh_tn_sol_vec[SYSTEM_DIM*(Es-1)*NLOC+(1*NLOC+idofs)],usolE_rhs1[idofs],diff_coefs); } } //printf("-------------------------\n"); init_zero_d(Aloc_matrix,NLOC*NLOC); init_zero_m(A_loc_QR,NLOC+1,NLOC+1); Alocal_mat(Es,mesh_element,mesh_node,user_param,Aloc_matrix); assemble_qr(Aloc_matrix,NLOC,Q_coeff,R_coeff,A_loc_QR,&sing_decomp); if(max_error2 > EPSILON_S) { qrsolv(A_loc_QR,NLOC,Q_coeff,R_coeff,usolE_rhs2); for(idofs = 0;idofs < NLOC;idofs++) { uh_tn_sol_vec[SYSTEM_DIM*(Es-1)*NLOC+(2*NLOC+idofs)]=usolE_rhs2[idofs]; //diff_coefs = fabs(uh_tn_sol_vec[SYSTEM_DIM*(Es-1)*NLOC+(2*NLOC+idofs)]-usolE_rhs2[idofs]); //printf("Es = %10.18lf correction %10.18lf diff = %.8e\n", uh_tn_sol_vec[SYSTEM_DIM*(Es-1)*NLOC+(2*NLOC+idofs)],usolE_rhs2[idofs],diff_coefs); } } //getchar(); } } }
double lmpar(int n, double *r__, int *ldr, int *ipvt, double *diag, double *qtb, double *delta, double par_init, double *x, double *sdiag, double *wa1, double *wa2) { double par = par_init; // ---- the return_value /* subroutine lmpar given an m by n matrix a, an n by n nonsingular diagonal matrix d, an m-vector b, and a positive number delta, the problem is to determine a value for the parameter par such that if x solves the system a*x = b , sqrt(par)*d*x = 0 , in the least squares sense, and dxnorm is the euclidean norm of d*x, then either par is 0. and (dxnorm-delta) <= 0.1*delta , or par is positive and abs(dxnorm-delta) <= 0.1*delta . this subroutine completes the solution of the problem if it is provided with the necessary information from the qr factorization, with column pivoting, of a. that is, if a*p = q*r, where p is a permutation matrix, q has orthogonal columns, and r is an upper triangular matrix with diagonal elements of nonincreasing magnitude, then lmpar expects the full upper triangle of r, the permutation matrix p, and the first n components of (q transpose)*b. on output lmpar also provides an upper triangular matrix s such that t t t p *(a *a + par*d*d)*p = s *s . s is employed within lmpar and may be of separate interest. only a few iterations are generally needed for convergence of the algorithm. if, however, the limit of 10 iterations is reached, then the output par will contain the best value obtained so far. the subroutine statement is subroutine lmpar(n,r,ldr,ipvt,diag,qtb,delta,par,x,sdiag, wa1,wa2) where n is a positive int input variable set to the order of r. r is an n by n array. on input the full upper triangle must contain the full upper triangle of the matrix r. on output the full upper triangle is unaltered, and the strict lower triangle contains the strict upper triangle (transposed) of the upper triangular matrix s. ldr is a positive int input variable not less than n which specifies the leading dimension of the array r. ipvt is an int input array of length n which defines the permutation matrix p such that a*p = q*r. column j of p is column ipvt(j) of the identity matrix. diag is an input array of length n which must contain the diagonal elements of the matrix d. qtb is an input array of length n which must contain the first n elements of the vector (q transpose)*b. delta is a positive input variable which specifies an upper bound on the euclidean norm of d*x. par is a nonnegative variable. on input par contains an initial estimate of the levenberg-marquardt parameter. on output par contains the final estimate. x is an output array of length n which contains the least squares solution of the system a*x = b, sqrt(par)*d*x = 0, for the output par. sdiag is an output array of length n which contains the diagonal elements of the upper triangular matrix s. wa1 and wa2 are work arrays of length n. subprograms called minpack-supplied ... dpmpar,enorm,qrsolv fortran-supplied ... fabs,dmax1,dmin1,dsqrt argonne national laboratory. minpack project. march 1980. burton s. garbow, kenneth e. hillstrom, jorge j. more ***********/ /* Initialized data */ static double p1 = .1; static double p001 = .001; /* System generated locals */ int r_dim1, r_offset; /* Local variables */ int i__, j, k, l, jp1, iter, nsing; double fp, sum, parc, parl, temp, paru, dwarf, gnorm, dxnorm; /* Parameter adjustments */ --wa2; --wa1; --sdiag; --x; --qtb; --diag; --ipvt; r_dim1 = *ldr; r_offset = 1 + r_dim1; r__ -= r_offset; // dwarf is the smallest positive magnitude : dwarf = machfd_.fltmin; /* compute and store in x the gauss-newton direction. if the jacobian is rank-deficient, obtain a least squares solution. */ nsing = n; for (j = 1; j <= n; ++j) { wa1[j] = qtb[j]; if (r__[j + j * r_dim1] == 0. && nsing == n) { nsing = j - 1; } if (nsing < n) { wa1[j] = 0.; } } for (k = 1; k <= nsing; ++k) { j = nsing - k + 1; wa1[j] /= r__[j + j * r_dim1]; temp = wa1[j]; for (i__ = 1; i__ <= j-1; ++i__) { wa1[i__] -= r__[i__ + j * r_dim1] * temp; } } for (j = 1; j <= n; ++j) { l = ipvt[j]; x[l] = wa1[j]; } /* initialize the iteration counter. evaluate the function at the origin, and test for acceptance of the gauss-newton direction. */ iter = 0; for (j = 1; j <= n; ++j) { wa2[j] = diag[j] * x[j]; } dxnorm = enorm(n, &wa2[1]); fp = dxnorm - *delta; if (fp <= p1 * *delta) { goto L220; } /* if the jacobian is not rank deficient, the newton step provides a lower bound, parl, for the zero of the function. Otherwise set this bound to 0. */ parl = 0.; if (nsing >= n) { for (j = 1; j <= n; ++j) { l = ipvt[j]; wa1[j] = diag[l] * (wa2[l] / dxnorm); } for (j = 1; j <= n; ++j) { sum = 0.; for (i__ = 1; i__ <= j-1; ++i__) { sum += r__[i__ + j * r_dim1] * wa1[i__]; } wa1[j] = (wa1[j] - sum) / r__[j + j * r_dim1]; } temp = enorm(n, &wa1[1]); parl = fp / *delta / temp / temp; } // L120: /* calculate an upper bound, paru, for the 0. of the function. */ for (j = 1; j <= n; ++j) { sum = 0.; for (i__ = 1; i__ <= j; ++i__) { sum += r__[i__ + j * r_dim1] * qtb[i__]; } l = ipvt[j]; wa1[j] = sum / diag[l]; } gnorm = enorm(n, &wa1[1]); paru = gnorm / *delta; if (paru == 0.) { paru = dwarf / fmin2(*delta,p1); } /* if the input par lies outside of the interval (parl,paru), set par to the closer endpoint. */ par = fmax2(par, parl); par = fmin2(par, paru); if (par == 0.) { par = gnorm / dxnorm; } /* beginning of an iteration. */ L150: ++iter; /* evaluate the function at the current value of par. */ if (par == 0.) par = fmax2(dwarf, p001 * paru); temp = sqrt(par); for (j = 1; j <= n; ++j) { wa1[j] = temp * diag[j]; } qrsolv(n, &r__[r_offset], ldr, &ipvt[1], &wa1[1], &qtb[1], &x[1], &sdiag[1], &wa2[1]); for (j = 1; j <= n; ++j) { wa2[j] = diag[j] * x[j]; } dxnorm = enorm(n, &wa2[1]); temp = fp; fp = dxnorm - *delta; /* if the function is small enough, accept the current value of par. also test for the exceptional cases where parl is 0. or the number of iterations has reached 10. */ if (fabs(fp) <= p1 * *delta || (parl == 0. && fp <= temp && temp < 0.) || iter == 10) { // << FIXME: give warning for iter == 10 !! goto L220; } /* compute the newton correction. */ for (j = 1; j <= n; ++j) { l = ipvt[j]; wa1[j] = diag[l] * (wa2[l] / dxnorm); } for (j = 1; j <= n; ++j) { wa1[j] /= sdiag[j]; temp = wa1[j]; jp1 = j + 1; for (i__ = jp1; i__ <= n; ++i__) { wa1[i__] -= r__[i__ + j * r_dim1] * temp; } } temp = enorm(n, &wa1[1]); parc = fp / *delta / temp / temp; /* depending on the sign of the function, update parl or paru. */ if (fp > 0.) { parl = fmax2(parl,par); } if (fp < 0.) { paru = fmin2(paru,par); } // compute an improved estimate for par. par = fmax2(parl, par + parc); /* end of an iteration. */ goto L150; L220: // termination. if (iter == 0) { par = 0.; } return par; } /* lmpar */