int LEVMAR_BC_DER( void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), /* functional relation describing measurements. A p \in R^m yields a \hat{x} \in R^n */ void (*jacf)(LM_REAL *p, LM_REAL *j, int m, int n, void *adata), /* function to evaluate the Jacobian \part x / \part p */ LM_REAL *p, /* I/O: initial parameter estimates. On output has the estimated solution */ LM_REAL *x, /* I: measurement vector. NULL implies a zero vector */ int m, /* I: parameter vector dimension (i.e. #unknowns) */ int n, /* I: measurement vector dimension */ LM_REAL *lb, /* I: vector of lower bounds. If NULL, no lower bounds apply */ LM_REAL *ub, /* I: vector of upper bounds. If NULL, no upper bounds apply */ LM_REAL *dscl, /* I: diagonal scaling constants. NULL implies no scaling */ int itmax, /* I: maximum number of iterations */ LM_REAL opts[4], /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3]. Respectively the scale factor for initial \mu, * stopping thresholds for ||J^T e||_inf, ||Dp||_2 and ||e||_2. Set to NULL for defaults to be used. * Note that ||J^T e||_inf is computed on free (not equal to lb[i] or ub[i]) variables only. */ LM_REAL info[LM_INFO_SZ], /* O: information regarding the minimization. Set to NULL if don't care * info[0]= ||e||_2 at initial p. * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||Dp||_2, mu/max[J^T J]_ii ], all computed at estimated p. * info[5]= # iterations, * info[6]=reason for terminating: 1 - stopped by small gradient J^T e * 2 - stopped by small Dp * 3 - stopped by itmax * 4 - singular matrix. Restart from current p with increased mu * 5 - no further error reduction is possible. Restart with increased mu * 6 - stopped by small ||e||_2 * 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error * info[7]= # function evaluations * info[8]= # Jacobian evaluations * info[9]= # linear systems solved, i.e. # attempts for reducing error */ LM_REAL *work, /* working memory at least LM_BC_DER_WORKSZ() reals large, allocated if NULL */ LM_REAL *covar, /* O: Covariance matrix corresponding to LS solution; mxm. Set to NULL if not needed. */ void *adata) /* pointer to possibly additional data, passed uninterpreted to func & jacf. * Set to NULL if not needed */ { register int i, j, k, l; int worksz, freework=0, issolved; /* temp work arrays */ LM_REAL *e, /* nx1 */ *hx, /* \hat{x}_i, nx1 */ *jacTe, /* J^T e_i mx1 */ *jac, /* nxm */ *jacTjac, /* mxm */ *Dp, /* mx1 */ *diag_jacTjac, /* diagonal of J^T J, mx1 */ *pDp, /* p + Dp, mx1 */ *sp_pDp=NULL; /* dscl*p or dscl*pDp, mx1 */ register LM_REAL mu, /* damping constant */ tmp; /* mainly used in matrix & vector multiplications */ LM_REAL p_eL2, jacTe_inf, pDp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+Dp)||_2 */ LM_REAL p_L2, Dp_L2=LM_REAL_MAX, dF, dL; LM_REAL tau, eps1, eps2, eps2_sq, eps3; LM_REAL init_p_eL2; int nu=2, nu2, stop=0, nfev, njev=0, nlss=0; const int nm=n*m; /* variables for constrained LM */ struct FUNC_STATE fstate; LM_REAL alpha=LM_CNST(1e-4), beta=LM_CNST(0.9), gamma=LM_CNST(0.99995), rho=LM_CNST(1e-8); LM_REAL t, t0, jacTeDp; LM_REAL tmin=LM_CNST(1e-12), tming=LM_CNST(1e-18); /* minimum step length for LS and PG steps */ const LM_REAL tini=LM_CNST(1.0); /* initial step length for LS and PG steps */ int nLMsteps=0, nLSsteps=0, nPGsteps=0, gprevtaken=0; int numactive; int (*linsolver)(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)=NULL; mu=jacTe_inf=t=0.0; /* -Wall */ if(n<m){ fprintf(stderr, LCAT(LEVMAR_BC_DER, "(): cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n"), n, m); return LM_ERROR; } if(!jacf){ fprintf(stderr, RCAT("No function specified for computing the Jacobian in ", LEVMAR_BC_DER) RCAT("().\nIf no such function is available, use ", LEVMAR_BC_DIF) RCAT("() rather than ", LEVMAR_BC_DER) "()\n"); return LM_ERROR; } if(!LEVMAR_BOX_CHECK(lb, ub, m)){ fprintf(stderr, LCAT(LEVMAR_BC_DER, "(): at least one lower bound exceeds the upper one\n")); return LM_ERROR; } if(dscl){ /* check that scaling consts are valid */ for(i=m; i-->0; ) if(dscl[i]<=0.0){ fprintf(stderr, LCAT(LEVMAR_BC_DER, "(): scaling constants should be positive (scale %d: %g <= 0)\n"), i, dscl[i]); return LM_ERROR; } sp_pDp=(LM_REAL *)malloc(m*sizeof(LM_REAL)); if(!sp_pDp){ fprintf(stderr, LCAT(LEVMAR_BC_DER, "(): memory allocation request failed\n")); return LM_ERROR; } } if(opts){ tau=opts[0]; eps1=opts[1]; eps2=opts[2]; eps2_sq=opts[2]*opts[2]; eps3=opts[3]; } else{ // use default values tau=LM_CNST(LM_INIT_MU); eps1=LM_CNST(LM_STOP_THRESH); eps2=LM_CNST(LM_STOP_THRESH); eps2_sq=LM_CNST(LM_STOP_THRESH)*LM_CNST(LM_STOP_THRESH); eps3=LM_CNST(LM_STOP_THRESH); } if(!work){ worksz=LM_BC_DER_WORKSZ(m, n); //2*n+4*m + n*m + m*m; work=(LM_REAL *)malloc(worksz*sizeof(LM_REAL)); /* allocate a big chunk in one step */ if(!work){ fprintf(stderr, LCAT(LEVMAR_BC_DER, "(): memory allocation request failed\n")); return LM_ERROR; } freework=1; } /* set up work arrays */ e=work; hx=e + n; jacTe=hx + n; jac=jacTe + m; jacTjac=jac + nm; Dp=jacTjac + m*m; diag_jacTjac=Dp + m; pDp=diag_jacTjac + m; fstate.n=n; fstate.hx=hx; fstate.x=x; fstate.lb=lb; fstate.ub=ub; fstate.adata=adata; fstate.nfev=&nfev; /* see if starting point is within the feasible set */ for(i=0; i<m; ++i) pDp[i]=p[i]; BOXPROJECT(p, lb, ub, m); /* project to feasible set */ for(i=0; i<m; ++i) if(pDp[i]!=p[i]) fprintf(stderr, RCAT("Warning: component %d of starting point not feasible in ", LEVMAR_BC_DER) "()! [%g projected to %g]\n", i, pDp[i], p[i]); /* compute e=x - f(p) and its L2 norm */ (*func)(p, hx, m, n, adata); nfev=1; /* ### e=x-hx, p_eL2=||e|| */ #if 1 p_eL2=LEVMAR_L2NRMXMY(e, x, hx, n); #else for(i=0, p_eL2=0.0; i<n; ++i){ e[i]=tmp=x[i]-hx[i]; p_eL2+=tmp*tmp; } #endif init_p_eL2=p_eL2; if(!LM_FINITE(p_eL2)) stop=7; if(dscl){ /* scale starting point and constraints */ for(i=m; i-->0; ) p[i]/=dscl[i]; BOXSCALE(lb, ub, dscl, m, 1); } for(k=0; k<itmax && !stop; ++k){ /* Note that p and e have been updated at a previous iteration */ if(p_eL2<=eps3){ /* error is small */ stop=6; break; } /* Compute the Jacobian J at p, J^T J, J^T e, ||J^T e||_inf and ||p||^2. * Since J^T J is symmetric, its computation can be sped up by computing * only its upper triangular part and copying it to the lower part */ if(!dscl){ (*jacf)(p, jac, m, n, adata); ++njev; } else{ for(i=m; i-->0; ) sp_pDp[i]=p[i]*dscl[i]; (*jacf)(sp_pDp, jac, m, n, adata); ++njev; /* compute jac*D */ for(i=n; i-->0; ){ register LM_REAL *jacim; jacim=jac+i*m; for(j=m; j-->0; ) jacim[j]*=dscl[j]; // jac[i*m+j]*=dscl[j]; } } /* J^T J, J^T e */ if(nm<__BLOCKSZ__SQ){ // this is a small problem /* J^T*J_ij = \sum_l J^T_il * J_lj = \sum_l J_li * J_lj. * Thus, the product J^T J can be computed using an outer loop for * l that adds J_li*J_lj to each element ij of the result. Note that * with this scheme, the accesses to J and JtJ are always along rows, * therefore induces less cache misses compared to the straightforward * algorithm for computing the product (i.e., l loop is innermost one). * A similar scheme applies to the computation of J^T e. * However, for large minimization problems (i.e., involving a large number * of unknowns and measurements) for which J/J^T J rows are too large to * fit in the L1 cache, even this scheme incures many cache misses. In * such cases, a cache-efficient blocking scheme is preferable. * * Thanks to John Nitao of Lawrence Livermore Lab for pointing out this * performance problem. * * Note that the non-blocking algorithm is faster on small * problems since in this case it avoids the overheads of blocking. */ register LM_REAL alpha, *jaclm, *jacTjacim; /* looping downwards saves a few computations */ for(i=m*m; i-->0; ) jacTjac[i]=0.0; for(i=m; i-->0; ) jacTe[i]=0.0; for(l=n; l-->0; ){ jaclm=jac+l*m; for(i=m; i-->0; ){ jacTjacim=jacTjac+i*m; alpha=jaclm[i]; //jac[l*m+i]; for(j=i+1; j-->0; ) /* j<=i computes lower triangular part only */ jacTjacim[j]+=jaclm[j]*alpha; //jacTjac[i*m+j]+=jac[l*m+j]*alpha /* J^T e */ jacTe[i]+=alpha*e[l]; } } for(i=m; i-->0; ) /* copy to upper part */ for(j=i+1; j<m; ++j) jacTjac[i*m+j]=jacTjac[j*m+i]; } else{ // this is a large problem /* Cache efficient computation of J^T J based on blocking */ LEVMAR_TRANS_MAT_MAT_MULT(jac, jacTjac, n, m); /* cache efficient computation of J^T e */ for(i=0; i<m; ++i) jacTe[i]=0.0; for(i=0; i<n; ++i){ register LM_REAL *jacrow; for(l=0, jacrow=jac+i*m, tmp=e[i]; l<m; ++l) jacTe[l]+=jacrow[l]*tmp; } } /* Compute ||J^T e||_inf and ||p||^2. Note that ||J^T e||_inf * is computed for free (i.e. inactive) variables only. * At a local minimum, if p[i]==ub[i] then g[i]>0; * if p[i]==lb[i] g[i]<0; otherwise g[i]=0 */ for(i=j=numactive=0, p_L2=jacTe_inf=0.0; i<m; ++i){ if(ub && p[i]==ub[i]){ ++numactive; if(jacTe[i]>0.0) ++j; } else if(lb && p[i]==lb[i]){ ++numactive; if(jacTe[i]<0.0) ++j; } else if(jacTe_inf < (tmp=FABS(jacTe[i]))) jacTe_inf=tmp; diag_jacTjac[i]=jacTjac[i*m+i]; /* save diagonal entries so that augmentation can be later canceled */ p_L2+=p[i]*p[i]; } //p_L2=sqrt(p_L2); #if 0 if(!(k%100)){ printf("Current estimate: "); for(i=0; i<m; ++i) printf("%.9g ", p[i]); printf("-- errors %.9g %0.9g, #active %d [%d]\n", jacTe_inf, p_eL2, numactive, j); } #endif /* check for convergence */ if(j==numactive && (jacTe_inf <= eps1)){ Dp_L2=0.0; /* no increment for p in this case */ stop=1; break; } /* compute initial damping factor */ if(k==0){ if(!lb && !ub){ /* no bounds */ for(i=0, tmp=LM_REAL_MIN; i<m; ++i) if(diag_jacTjac[i]>tmp) tmp=diag_jacTjac[i]; /* find max diagonal element */ mu=tau*tmp; } else mu=LM_CNST(0.5)*tau*p_eL2; /* use Kanzow's starting mu */ } /* determine increment using a combination of adaptive damping, line search and projected gradient search */ while(1){ /* augment normal equations */ for(i=0; i<m; ++i) jacTjac[i*m+i]+=mu; /* solve augmented equations */ #ifdef HAVE_LAPACK /* 7 alternatives are available: LU, Cholesky + Cholesky with PLASMA, LDLt, 2 variants of QR decomposition and SVD. * For matrices with dimensions of at least a few hundreds, the PLASMA implementation of Cholesky is the fastest. * From the serial solvers, Cholesky is the fastest but might occasionally be inapplicable due to numerical round-off; * QR is slower but more robust; SVD is the slowest but most robust; LU is quite robust but * slower than LDLt; LDLt offers a good tradeoff between robustness and speed */ issolved=AX_EQ_B_BK(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_BK; //issolved=AX_EQ_B_LU(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_LU; //issolved=AX_EQ_B_CHOL(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_CHOL; #ifdef HAVE_PLASMA //issolved=AX_EQ_B_PLASMA_CHOL(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_PLASMA_CHOL; #endif //issolved=AX_EQ_B_QR(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_QR; //issolved=AX_EQ_B_QRLS(jacTjac, jacTe, Dp, m, m); ++nlss; linsolver=(int (*)(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m))AX_EQ_B_QRLS; //issolved=AX_EQ_B_SVD(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_SVD; #else /* use the LU included with levmar */ issolved=AX_EQ_B_LU(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_LU; #endif /* HAVE_LAPACK */ if(issolved){ for(i=0; i<m; ++i) pDp[i]=p[i] + Dp[i]; /* compute p's new estimate and ||Dp||^2 */ BOXPROJECT(pDp, lb, ub, m); /* project to feasible set */ for(i=0, Dp_L2=0.0; i<m; ++i){ Dp[i]=tmp=pDp[i]-p[i]; Dp_L2+=tmp*tmp; } //Dp_L2=sqrt(Dp_L2); if(Dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */ stop=2; break; } if(Dp_L2>=(p_L2+eps2)/(LM_CNST(EPSILON)*LM_CNST(EPSILON))){ /* almost singular */ stop=4; break; } if(!dscl){ (*func)(pDp, hx, m, n, adata); ++nfev; /* evaluate function at p + Dp */ } else{ for(i=m; i-->0; ) sp_pDp[i]=pDp[i]*dscl[i]; (*func)(sp_pDp, hx, m, n, adata); ++nfev; /* evaluate function at p + Dp */ } /* ### hx=x-hx, pDp_eL2=||hx|| */ #if 1 pDp_eL2=LEVMAR_L2NRMXMY(hx, x, hx, n); #else for(i=0, pDp_eL2=0.0; i<n; ++i){ /* compute ||e(pDp)||_2 */ hx[i]=tmp=x[i]-hx[i]; pDp_eL2+=tmp*tmp; } #endif /* the following test ensures that the computation of pDp_eL2 has not overflowed. * Such an overflow does no harm here, thus it is not signalled as an error */ if(!LM_FINITE(pDp_eL2) && !LM_FINITE(VECNORM(hx, n))){ stop=7; break; } if(pDp_eL2<=gamma*p_eL2){ for(i=0, dL=0.0; i<m; ++i) dL+=Dp[i]*(mu*Dp[i]+jacTe[i]); #if 1 if(dL>0.0){ dF=p_eL2-pDp_eL2; tmp=(LM_CNST(2.0)*dF/dL-LM_CNST(1.0)); tmp=LM_CNST(1.0)-tmp*tmp*tmp; mu=mu*( (tmp>=LM_CNST(ONE_THIRD))? tmp : LM_CNST(ONE_THIRD) ); } else{ tmp=LM_CNST(0.1)*pDp_eL2; /* pDp_eL2 is the new p_eL2 */ mu=(mu>=tmp)? tmp : mu; } #else tmp=LM_CNST(0.1)*pDp_eL2; /* pDp_eL2 is the new p_eL2 */ mu=(mu>=tmp)? tmp : mu; #endif nu=2; for(i=0 ; i<m; ++i) /* update p's estimate */ p[i]=pDp[i]; for(i=0; i<n; ++i) /* update e and ||e||_2 */ e[i]=hx[i]; p_eL2=pDp_eL2; ++nLMsteps; gprevtaken=0; break; } /* note that if the LM step is not taken, code falls through to the LM line search below */ } else{ /* the augmented linear system could not be solved, increase mu */ mu*=nu; nu2=nu<<1; // 2*nu; if(nu2<=nu){ /* nu has wrapped around (overflown). Thanks to Frank Jordan for spotting this case */ stop=5; break; } nu=nu2; for(i=0; i<m; ++i) /* restore diagonal J^T J entries */ jacTjac[i*m+i]=diag_jacTjac[i]; continue; /* solve again with increased nu */ } /* if this point is reached, the LM step did not reduce the error; * see if it is a descent direction */ /* negate jacTe (i.e. g) & compute g^T * Dp */ for(i=0, jacTeDp=0.0; i<m; ++i){ jacTe[i]=-jacTe[i]; jacTeDp+=jacTe[i]*Dp[i]; } if(jacTeDp<=-rho*pow(Dp_L2, LM_CNST(_POW_)/LM_CNST(2.0))){ /* Dp is a descent direction; do a line search along it */ #if 1 /* use Schnabel's backtracking line search; it requires fewer "func" evaluations */ { int mxtake, iretcd; LM_REAL stepmx, steptl=LM_CNST(1e3)*(LM_REAL)sqrt(LM_REAL_EPSILON); tmp=(LM_REAL)sqrt(p_L2); stepmx=LM_CNST(1e3)*( (tmp>=LM_CNST(1.0))? tmp : LM_CNST(1.0) ); LNSRCH(m, p, p_eL2, jacTe, Dp, alpha, pDp, &pDp_eL2, func, &fstate, &mxtake, &iretcd, stepmx, steptl, dscl); /* NOTE: LNSRCH() updates hx */ if(iretcd!=0 || !LM_FINITE(pDp_eL2)) goto gradproj; /* rather inelegant but effective way to handle LNSRCH() failures... */ } #else /* use the simpler (but slower!) line search described by Kanzow et al */ for(t=tini; t>tmin; t*=beta){ for(i=0; i<m; ++i) pDp[i]=p[i] + t*Dp[i]; BOXPROJECT(pDp, lb, ub, m); /* project to feasible set */ if(!dscl){ (*func)(pDp, hx, m, n, adata); ++nfev; /* evaluate function at p + t*Dp */ } else{ for(i=m; i-->0; ) sp_pDp[i]=pDp[i]*dscl[i]; (*func)(sp_pDp, hx, m, n, adata); ++nfev; /* evaluate function at p + t*Dp */ } /* compute ||e(pDp)||_2 */ /* ### hx=x-hx, pDp_eL2=||hx|| */ #if 1 pDp_eL2=LEVMAR_L2NRMXMY(hx, x, hx, n); #else for(i=0, pDp_eL2=0.0; i<n; ++i){ hx[i]=tmp=x[i]-hx[i]; pDp_eL2+=tmp*tmp; } #endif /* ||e(pDp)||_2 */ if(!LM_FINITE(pDp_eL2)) goto gradproj; /* treat as line search failure */ //if(LM_CNST(0.5)*pDp_eL2<=LM_CNST(0.5)*p_eL2 + t*alpha*jacTeDp) break; if(pDp_eL2<=p_eL2 + LM_CNST(2.0)*t*alpha*jacTeDp) break; } #endif /* line search alternatives */ ++nLSsteps; gprevtaken=0; /* NOTE: new estimate for p is in pDp, associated error in hx and its norm in pDp_eL2. * These values are used below to update their corresponding variables */ } else{ /* Note that this point can also be reached via a goto when LNSRCH() fails. */ gradproj: /* jacTe has been negated above. Being a descent direction, it is next used * to make a projected gradient step */ /* compute ||g|| */ for(i=0, tmp=0.0; i<m; ++i) tmp+=jacTe[i]*jacTe[i]; tmp=(LM_REAL)sqrt(tmp); tmp=LM_CNST(100.0)/(LM_CNST(1.0)+tmp); t0=(tmp<=tini)? tmp : tini; /* guard against poor scaling & large steps; see (3.50) in C.T. Kelley's book */ /* if the previous step was along the gradient descent, try to use the t employed in that step */ for(t=(gprevtaken)? t : t0; t>tming; t*=beta){ for(i=0; i<m; ++i) pDp[i]=p[i] - t*jacTe[i]; BOXPROJECT(pDp, lb, ub, m); /* project to feasible set */ for(i=0, Dp_L2=0.0; i<m; ++i){ Dp[i]=tmp=pDp[i]-p[i]; Dp_L2+=tmp*tmp; } if(!dscl){ (*func)(pDp, hx, m, n, adata); ++nfev; /* evaluate function at p - t*g */ } else{ for(i=m; i-->0; ) sp_pDp[i]=pDp[i]*dscl[i]; (*func)(sp_pDp, hx, m, n, adata); ++nfev; /* evaluate function at p - t*g */ } /* compute ||e(pDp)||_2 */ /* ### hx=x-hx, pDp_eL2=||hx|| */ #if 1 pDp_eL2=LEVMAR_L2NRMXMY(hx, x, hx, n); #else for(i=0, pDp_eL2=0.0; i<n; ++i){ hx[i]=tmp=x[i]-hx[i]; pDp_eL2+=tmp*tmp; } #endif /* the following test ensures that the computation of pDp_eL2 has not overflowed. * Such an overflow does no harm here, thus it is not signalled as an error */ if(!LM_FINITE(pDp_eL2) && !LM_FINITE(VECNORM(hx, n))){ stop=7; goto breaknested; } /* compute ||g^T * Dp||. Note that if pDp has not been altered by projection * (i.e. BOXPROJECT), jacTeDp=-t*||g||^2 */ for(i=0, jacTeDp=0.0; i<m; ++i) jacTeDp+=jacTe[i]*Dp[i]; if(gprevtaken && pDp_eL2<=p_eL2 + LM_CNST(2.0)*LM_CNST(0.99999)*jacTeDp){ /* starting t too small */ t=t0; gprevtaken=0; continue; } //if(LM_CNST(0.5)*pDp_eL2<=LM_CNST(0.5)*p_eL2 + alpha*jacTeDp) terminatePGLS; if(pDp_eL2<=p_eL2 + LM_CNST(2.0)*alpha*jacTeDp) goto terminatePGLS; //if(pDp_eL2<=p_eL2 - LM_CNST(2.0)*alpha/t*Dp_L2) goto terminatePGLS; // sufficient decrease condition proposed by Kelley in (5.13) } /* if this point is reached then the gradient line search has failed */ gprevtaken=0; break; terminatePGLS: ++nPGsteps; gprevtaken=1; /* NOTE: new estimate for p is in pDp, associated error in hx and its norm in pDp_eL2 */ } /* update using computed values */ for(i=0, Dp_L2=0.0; i<m; ++i){ tmp=pDp[i]-p[i]; Dp_L2+=tmp*tmp; } //Dp_L2=sqrt(Dp_L2); if(Dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */ stop=2; break; } for(i=0 ; i<m; ++i) /* update p's estimate */ p[i]=pDp[i]; for(i=0; i<n; ++i) /* update e and ||e||_2 */ e[i]=hx[i]; p_eL2=pDp_eL2; break; } /* inner loop */ } breaknested: /* NOTE: this point is also reached via an explicit goto! */ if(k>=itmax) stop=3; for(i=0; i<m; ++i) /* restore diagonal J^T J entries */ jacTjac[i*m+i]=diag_jacTjac[i]; if(info){ info[0]=init_p_eL2; info[1]=p_eL2; info[2]=jacTe_inf; info[3]=Dp_L2; for(i=0, tmp=LM_REAL_MIN; i<m; ++i) if(tmp<jacTjac[i*m+i]) tmp=jacTjac[i*m+i]; info[4]=mu/tmp; info[5]=(LM_REAL)k; info[6]=(LM_REAL)stop; info[7]=(LM_REAL)nfev; info[8]=(LM_REAL)njev; info[9]=(LM_REAL)nlss; } /* covariance matrix */ if(covar){ LEVMAR_COVAR(jacTjac, covar, p_eL2, m, n); if(dscl){ /* correct for the scaling */ for(i=m; i-->0; ) for(j=m; j-->0; ) covar[i*m+j]*=(dscl[i]*dscl[j]); } } if(freework) free(work); #ifdef LINSOLVERS_RETAIN_MEMORY if(linsolver) (*linsolver)(NULL, NULL, NULL, 0); #endif #if 0 printf("%d LM steps, %d line search, %d projected gradient\n", nLMsteps, nLSsteps, nPGsteps); #endif if(dscl){ /* scale final point and constraints */ for(i=0; i<m; ++i) p[i]*=dscl[i]; BOXSCALE(lb, ub, dscl, m, 0); free(sp_pDp); } return (stop!=4 && stop!=7)? k : LM_ERROR; }
Numeric LMSubspaceOptimizer::optimize( const VariablePtrVec & vars, const FactorPtrVec & factors, NumericVec & xval, Numeric & deltaFval, const bool printdbg ) { // // box-constrained minimization // extern int dlevmar_bc_der( // void (*func)(double *p, double *hx, int m, int n, void *adata), // void (*jacf)(double *p, double *j, int m, int n, void *adata), // double *p, double *x, int m, int n, double *lb, double *ub, double *dscl, // int itmax, double *opts, double *info, double *work, double *covar, // void *adata); //#define USE_LEVMAR #ifdef USE_LEVMAR const Clock::time_point starttime = Clock::now(); // initstate.assign( xval.begin(), xval.end() ); LMSSOpt::AuxData aux( vars, factors, f, pgtemp ); const int m = vars.size(); const int n = std::max( factors.size(), (size_t) m ); initeval.resize( n ); finaleval.resize( n ); // double lb[ m ]; // double ub[ m ]; // // for ( size_t i = 0; i < vars.size(); ++i ) { // NumericInterval dom = vars[i]->getDomain().interval(); // lb[i] = ( dom.lower() <= -DBL_MAX ? DBL_MAX : dom.lower() ); // ub[i] = ( dom.upper() >= DBL_MAX ? DBL_MAX : dom.upper() ); // } double * dscl = NULL; double opts[ LM_OPTS_SZ ]; // double * opts = NULL; double info[ LM_INFO_SZ ]; double * work = new double[ LM_BC_DER_WORKSZ( m, n ) ]; double * covar = NULL; if ( printdbg ) { std::cout << "LM SS opt m=" << m << ", n=" << n << " (" << vars.size() << " vars, " << factors.size() << " factors)" << std::endl; } // LMSSOpt::evalFunc( xval.data(), initeval.data(), m, n, &aux ); // const Numeric ival = NumericVecOps::dot( initeval, initeval ) / 2.0; Numeric ferr = 0.0; const Numeric ival = f.evalFactors( factors, ferr ); opts[0] = 1e-3; // initial \mu scale factor opts[1] = 1e-15; // stopping thresh. for ||J^T e||_inf opts[2] = 1e-15; // stopping thresh. for ||Dp||_2 opts[3] = ftol; // stopping thresh. for ||e||_2 // int niters = dlevmar_bc_der( // &LMSSOpt::evalFunc, // &LMSSOpt::evalJacf, // xval.data(), NULL, xval.size(), n, lb, ub, // NULL, maxiters, opts, info, work, covar, &aux ); int niters = dlevmar_der( &LMSSOpt::evalFunc, &LMSSOpt::evalJacf, xval.data(), NULL, xval.size(), n, maxiters, opts, info, work, covar, &aux ); // sanitize values to be within the domain of this variable (note that they // are sanitized in quickAssign(), so just copy them out after) LMSSOpt::quickAssignVals( aux, m, xval.data() ); for ( size_t i = 0; i < vars.size(); ++i ) { xval[i] = vars[i]->eval(); } // LMSSOpt::evalFunc( xval.data(), finaleval.data(), m, n, &aux ); // const Numeric fval = NumericVecOps::dot( finaleval, finaleval ) / 2.0; Numeric fval = f.evalFactors( factors, ferr ); // Numeric asdf = 0; // for ( size_t i = 0; i < factors.size(); ++i ) { // asdf += factors[i]->evalNoCache(); // std::cout << i << ": f " << factors[i]->getID() << " diff " << // ( finaleval[i]*finaleval[i]/2.0 - factors[i]->eval() ) << " (" // << factors[i]->eval() << ")" << std::endl; // } // std::cout << "asdf: " << asdf << std::endl; // // std::cout << "f.eval " << f.eval() << std::endl; // Numeric ferr = 0.0; // std::cout << "f fact eval " << f.evalFactors( factors, ferr ) << std::endl; // // std::cout << "xval: " << xval << std::endl; // double err[n]; // dlevmar_chkjac( &LMSSOpt::evalFunc, &LMSSOpt::evalJacf, xval.data(), m, n, // &aux, err ); // // for ( size_t i = 0; i < m; ++i ) { // if ( err[i] < 0.5 ) { // std::cout << i << ": var " << vars[i]->getID() << " has jac err " // << err[i] << std::endl; // } // } delete work; deltaFval = ( fval - ival); const Duration dur = ( Clock::now() - starttime ); if ( printdbg ) { std::cout << "LM SS opt returned " << fval << " (init " << ival << ", diff " << deltaFval << ") after " << niters << " iterations in " << dur.count() << " seconds" << std::endl; std::cout << "LM SS opt info -- termination: " << info[6] << ", #fevals: " << info[7] << ", #jevals: " << info[8] << ", #linsolves" << info[9] << std::endl; } // if ( deltaFval > 0 ) { // fval = ival; // xval.assign( initstate.begin(), initstate.end() ); // deltaFval = 0; // std::cout << "LM SS made no progress -- returning initial state" << std::endl; // } return fval; #else // USE_LEVMAR std::cout << "ERROR: Cannot use LM subspace optimizer without levmar.h." << " Rebuild with -DUSE_LEVMAR and link to liblevmar." << std::endl; std::exit( -1 ); return 0; #endif // USE_LEVMAR }