extern "C" LEVMARDLL_API void Rhofit(BoxReflSettings* InitStruct, double parameters[], double covariance[], int parametersize, double info[]) { USES_CONVERSION; double ChiSquare = 0; double opts[LM_OPTS_SZ]; double* xvec = new double[InitStruct->ZLength] ; double *work, *covar; opts[0]=LM_INIT_MU; opts[1]=1E-15; opts[2]=1E-15; opts[3]=1E-20; opts[4]=-LM_DIFF_DELTA; // relevant only if the finite difference jacobian version is used RhoCalc Rho; Rho.init(InitStruct); //Allocate a dummy array - Our real calculation is done in Refl.objective memset(xvec, 0, InitStruct->ZLength*sizeof(double)); //Allocate workspace and our covariance matrix work=new double[((LM_DIF_WORKSZ(parametersize, InitStruct->ZLength)+parametersize*InitStruct->ZLength))]; covar=work+LM_DIF_WORKSZ(parametersize, InitStruct->ZLength); dlevmar_dif(Rho.objective, parameters, xvec, parametersize,InitStruct->ZLength, 1000, opts, info, work, covar,(void*)(&Rho)); //Calculate the standard deviations in the parameters for(int i = 0; i< parametersize;i++) { covariance[i] = sqrt(covar[i*(parametersize+1)]); } delete xvec; delete work; }
extern "C" LEVMARDLL_API void FastReflfit(BoxReflSettings* InitStruct, double m_cParamVec[], double covariance[], int paramsize, double info[]) { USES_CONVERSION; //Variables double *work, *covar; double ChiSquare = 0; double Qc = 0; double calcholder = 0; FastReflcalc Refl; Refl.init(InitStruct); //Setup the fit //opts[4] is relevant only if the finite difference jacobian version is used double opts[LM_OPTS_SZ]; opts[0]=LM_INIT_MU; opts[1]=1E-15; opts[2]=1E-15; opts[3]=1E-20; opts[4]=-LM_DIFF_DELTA; //Allocate a dummy array - Our real calculation is done in Refl.objective double* xvec = new double[InitStruct->QPoints] ; memset(xvec, 0, InitStruct->QPoints*sizeof(double)); //Allocate workspace and our covariance matrix work=new double[((LM_DIF_WORKSZ(paramsize, InitStruct->QPoints)+paramsize*InitStruct->QPoints))]; covar=work+LM_DIF_WORKSZ(paramsize, InitStruct->QPoints); if(InitStruct->UL == NULL) dlevmar_dif(Refl.objective,m_cParamVec, xvec, paramsize,InitStruct->QPoints, 1000, opts, info, work, covar,(void*)(&Refl)); else dlevmar_bc_dif(Refl.objective, m_cParamVec, xvec, paramsize,InitStruct->QPoints, InitStruct->LL,InitStruct->UL,1000, opts, info, work, covar,(void*)(&Refl)); for(int i = 0; i< paramsize;i++) { covariance[i] = sqrt(covar[i*(paramsize+1)]); } delete[] xvec; delete[] work; }
int main() { register int i, j; int problem, ret; double p[5], // 5 is max(2, 3, 5) x[16]; // 16 is max(2, 3, 5, 6, 16) int m, n; double opts[LM_OPTS_SZ], info[LM_INFO_SZ]; char *probname[]={ "Rosenbrock function", "modified Rosenbrock problem", "Powell's function", "Wood's function", "Meyer's (reformulated) problem", "Osborne's problem", "helical valley function", "Boggs & Tolle's problem #3", "Hock - Schittkowski problem #28", "Hock - Schittkowski problem #48", "Hock - Schittkowski problem #51", "Hock - Schittkowski problem #01", "Hock - Schittkowski modified problem #21", "hatfldb problem", "hatfldc problem", "equilibrium combustion problem", "Hock - Schittkowski modified #1 problem #52", "Schittkowski modified problem #235", "Boggs & Tolle modified problem #7", "Hock - Schittkowski modified #2 problem #52", "Hock - Schittkowski modified problem #76", }; opts[0]=LM_INIT_MU; opts[1]=1E-15; opts[2]=1E-15; opts[3]=1E-20; opts[4]= LM_DIFF_DELTA; // relevant only if the Jacobian is approximated using finite differences; specifies forward differencing //opts[4]=-LM_DIFF_DELTA; // specifies central differencing to approximate Jacobian; more accurate but more expensive to compute! /* uncomment the appropriate line below to select a minimization problem */ problem= //0; // Rosenbrock function //1; // modified Rosenbrock problem //2; // Powell's function //3; // Wood's function 4; // Meyer's (reformulated) problem //5; // Osborne's problem //6; // helical valley function #ifdef HAVE_LAPACK //7; // Boggs & Tolle's problem 3 //8; // Hock - Schittkowski problem 28 //9; // Hock - Schittkowski problem 48 //10; // Hock - Schittkowski problem 51 #else // no LAPACK #ifdef _MSC_VER #pragma message("LAPACK not available, some test problems cannot be used") #else #warning LAPACK not available, some test problems cannot be used #endif // _MSC_VER #endif /* HAVE_LAPACK */ //11; // Hock - Schittkowski problem 01 //12; // Hock - Schittkowski modified problem 21 //13; // hatfldb problem //14; // hatfldc problem //15; // equilibrium combustion problem #ifdef HAVE_LAPACK //16; // Hock - Schittkowski modified #1 problem 52 //17; // Schittkowski modified problem 235 //18; // Boggs & Tolle modified problem #7 //19; // Hock - Schittkowski modified #2 problem 52 //20; // Hock - Schittkowski modified problem #76" #endif /* HAVE_LAPACK */ switch(problem){ default: fprintf(stderr, "unknown problem specified (#%d)! Note that some minimization problems require LAPACK.\n", problem); exit(1); break; case 0: /* Rosenbrock function */ m=2; n=2; p[0]=-1.2; p[1]=1.0; for(i=0; i<n; i++) x[i]=0.0; ret=dlevmar_der(ros, jacros, p, x, m, n, 1000, opts, info, NULL, NULL, NULL); // with analytic Jacobian //ret=dlevmar_dif(ros, p, x, m, n, 1000, opts, info, NULL, NULL, NULL); // no Jacobian break; case 1: /* modified Rosenbrock problem */ m=2; n=3; p[0]=-1.2; p[1]=1.0; for(i=0; i<n; i++) x[i]=0.0; ret=dlevmar_der(modros, jacmodros, p, x, m, n, 1000, opts, info, NULL, NULL, NULL); // with analytic Jacobian //ret=dlevmar_dif(modros, p, x, m, n, 1000, opts, info, NULL, NULL, NULL); // no Jacobian break; case 2: /* Powell's function */ m=2; n=2; p[0]=3.0; p[1]=1.0; for(i=0; i<n; i++) x[i]=0.0; ret=dlevmar_der(powell, jacpowell, p, x, m, n, 1000, opts, info, NULL, NULL, NULL); // with analytic Jacobian //ret=dlevmar_dif(powell, p, x, m, n, 1000, opts, info, NULL, NULL, NULL); // no Jacobian break; case 3: /* Wood's function */ m=4; n=6; p[0]=-3.0; p[1]=-1.0; p[2]=-3.0; p[3]=-1.0; for(i=0; i<n; i++) x[i]=0.0; ret=dlevmar_dif(wood, p, x, m, n, 1000, opts, info, NULL, NULL, NULL); // no Jacobian break; case 4: /* Meyer's data fitting problem */ m=3; n=16; p[0]=8.85; p[1]=4.0; p[2]=2.5; x[0]=34.780; x[1]=28.610; x[2]=23.650; x[3]=19.630; x[4]=16.370; x[5]=13.720; x[6]=11.540; x[7]=9.744; x[8]=8.261; x[9]=7.030; x[10]=6.005; x[11]=5.147; x[12]=4.427; x[13]=3.820; x[14]=3.307; x[15]=2.872; //ret=dlevmar_der(meyer, jacmeyer, p, x, m, n, 1000, opts, info, NULL, NULL, NULL); // with analytic Jacobian { double *work, *covar; work=malloc((LM_DIF_WORKSZ(m, n)+m*m)*sizeof(double)); if(!work){ fprintf(stderr, "memory allocation request failed in main()\n"); exit(1); } covar=work+LM_DIF_WORKSZ(m, n); ret=dlevmar_dif(meyer, p, x, m, n, 1000, opts, info, work, covar, NULL); // no Jacobian, caller allocates work memory, covariance estimated printf("Covariance of the fit:\n"); for(i=0; i<m; ++i){ for(j=0; j<m; ++j) printf("%g ", covar[i*m+j]); printf("\n"); } printf("\n"); free(work); } /* uncomment the following block to verify Jacobian */ /* { double err[16]; dlevmar_chkjac(meyer, jacmeyer, p, m, n, NULL, err); for(i=0; i<n; ++i) printf("gradient %d, err %g\n", i, err[i]); } */ break; case 5: /* Osborne's data fitting problem */ { double x33[]={ 8.44E-1, 9.08E-1, 9.32E-1, 9.36E-1, 9.25E-1, 9.08E-1, 8.81E-1, 8.5E-1, 8.18E-1, 7.84E-1, 7.51E-1, 7.18E-1, 6.85E-1, 6.58E-1, 6.28E-1, 6.03E-1, 5.8E-1, 5.58E-1, 5.38E-1, 5.22E-1, 5.06E-1, 4.9E-1, 4.78E-1, 4.67E-1, 4.57E-1, 4.48E-1, 4.38E-1, 4.31E-1, 4.24E-1, 4.2E-1, 4.14E-1, 4.11E-1, 4.06E-1}; m=5; n=33; p[0]=0.5; p[1]=1.5; p[2]=-1.0; p[3]=1.0E-2; p[4]=2.0E-2; ret=dlevmar_der(osborne, jacosborne, p, x33, m, n, 1000, opts, info, NULL, NULL, NULL); // with analytic Jacobian //ret=dlevmar_dif(osborne, p, x33, m, n, 1000, opts, info, NULL, NULL, NULL); // no Jacobian } break; case 6: /* helical valley function */ m=3; n=3; p[0]=-1.0; p[1]=0.0; p[2]=0.0; for(i=0; i<n; i++) x[i]=0.0; ret=dlevmar_der(helval, jachelval, p, x, m, n, 1000, opts, info, NULL, NULL, NULL); // with analytic Jacobian //ret=dlevmar_dif(helval, p, x, m, n, 1000, opts, info, NULL, NULL, NULL); // no Jacobian break; #ifdef HAVE_LAPACK case 7: /* Boggs-Tolle problem 3 */ m=5; n=5; p[0]=2.0; p[1]=2.0; p[2]=2.0; p[3]=2.0; p[4]=2.0; for(i=0; i<n; i++) x[i]=0.0; { double A[3*5]={1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, -2.0, 0.0, 1.0, 0.0, 0.0, -1.0}, b[3]={0.0, 0.0, 0.0}; ret=dlevmar_lec_der(bt3, jacbt3, p, x, m, n, A, b, 3, 1000, opts, info, NULL, NULL, NULL); // lin. constraints, analytic Jacobian //ret=dlevmar_lec_dif(bt3, p, x, m, n, A, b, 3, 1000, opts, info, NULL, NULL, NULL); // lin. constraints, no Jacobian } break; case 8: /* Hock - Schittkowski problem 28 */ m=3; n=3; p[0]=-4.0; p[1]=1.0; p[2]=1.0; for(i=0; i<n; i++) x[i]=0.0; { double A[1*3]={1.0, 2.0, 3.0}, b[1]={1.0}; ret=dlevmar_lec_der(hs28, jachs28, p, x, m, n, A, b, 1, 1000, opts, info, NULL, NULL, NULL); // lin. constraints, analytic Jacobian //ret=dlevmar_lec_dif(hs28, p, x, m, n, A, b, 1, 1000, opts, info, NULL, NULL, NULL); // lin. constraints, no Jacobian } break; case 9: /* Hock - Schittkowski problem 48 */ m=5; n=5; p[0]=3.0; p[1]=5.0; p[2]=-3.0; p[3]=2.0; p[4]=-2.0; for(i=0; i<n; i++) x[i]=0.0; { double A[2*5]={1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, -2.0, -2.0}, b[2]={5.0, -3.0}; ret=dlevmar_lec_der(hs48, jachs48, p, x, m, n, A, b, 2, 1000, opts, info, NULL, NULL, NULL); // lin. constraints, analytic Jacobian //ret=dlevmar_lec_dif(hs48, p, x, m, n, A, b, 2, 1000, opts, info, NULL, NULL, NULL); // lin. constraints, no Jacobian } break; case 10: /* Hock - Schittkowski problem 51 */ m=5; n=5; p[0]=2.5; p[1]=0.5; p[2]=2.0; p[3]=-1.0; p[4]=0.5; for(i=0; i<n; i++) x[i]=0.0; { double A[3*5]={1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, -2.0, 0.0, 1.0, 0.0, 0.0, -1.0}, b[3]={4.0, 0.0, 0.0}; ret=dlevmar_lec_der(hs51, jachs51, p, x, m, n, A, b, 3, 1000, opts, info, NULL, NULL, NULL); // lin. constraints, analytic Jacobian //ret=dlevmar_lec_dif(hs51, p, x, m, n, A, b, 3, 1000, opts, info, NULL, NULL, NULL); // lin. constraints, no Jacobian } break; #endif /* HAVE_LAPACK */ case 11: /* Hock - Schittkowski problem 01 */ m=2; n=2; p[0]=-2.0; p[1]=1.0; for(i=0; i<n; i++) x[i]=0.0; //ret=dlevmar_der(hs01, jachs01, p, x, m, n, 1000, opts, info, NULL, NULL, NULL); // with analytic Jacobian { double lb[2], ub[2]; lb[0]=-DBL_MAX; lb[1]=-1.5; ub[0]=ub[1]=DBL_MAX; ret=dlevmar_bc_der(hs01, jachs01, p, x, m, n, lb, ub, 1000, opts, info, NULL, NULL, NULL); // with analytic Jacobian } break; case 12: /* Hock - Schittkowski (modified) problem 21 */ m=2; n=2; p[0]=-1.0; p[1]=-1.0; for(i=0; i<n; i++) x[i]=0.0; //ret=dlevmar_der(hs21, jachs21, p, x, m, n, 1000, opts, info, NULL, NULL, NULL); // with analytic Jacobian { double lb[2], ub[2]; lb[0]=2.0; lb[1]=-50.0; ub[0]=50.0; ub[1]=50.0; ret=dlevmar_bc_der(hs21, jachs21, p, x, m, n, lb, ub, 1000, opts, info, NULL, NULL, NULL); // with analytic Jacobian } break; case 13: /* hatfldb problem */ m=4; n=4; p[0]=p[1]=p[2]=p[3]=0.1; for(i=0; i<n; i++) x[i]=0.0; //ret=dlevmar_der(hatfldb, jachatfldb, p, x, m, n, 1000, opts, info, NULL, NULL, NULL); // with analytic Jacobian { double lb[4], ub[4]; lb[0]=lb[1]=lb[2]=lb[3]=0.0; ub[0]=ub[2]=ub[3]=DBL_MAX; ub[1]=0.8; ret=dlevmar_bc_der(hatfldb, jachatfldb, p, x, m, n, lb, ub, 1000, opts, info, NULL, NULL, NULL); // with analytic Jacobian } break; case 14: /* hatfldc problem */ m=4; n=4; p[0]=p[1]=p[2]=p[3]=0.9; for(i=0; i<n; i++) x[i]=0.0; //ret=dlevmar_der(hatfldc, jachatfldc, p, x, m, n, 1000, opts, info, NULL, NULL, NULL); // with analytic Jacobian { double lb[4], ub[4]; lb[0]=lb[1]=lb[2]=lb[3]=0.0; ub[0]=ub[1]=ub[2]=ub[3]=10.0; ret=dlevmar_bc_der(hatfldc, jachatfldc, p, x, m, n, lb, ub, 1000, opts, info, NULL, NULL, NULL); // with analytic Jacobian } break; case 15: /* equilibrium combustion problem */ m=5; n=5; p[0]=p[1]=p[2]=p[3]=p[4]=0.0001; for(i=0; i<n; i++) x[i]=0.0; //ret=dlevmar_der(combust, jaccombust, p, x, m, n, 1000, opts, info, NULL, NULL, NULL); // with analytic Jacobian { double lb[5], ub[5]; lb[0]=lb[1]=lb[2]=lb[3]=lb[4]=0.0001; ub[0]=ub[1]=ub[2]=ub[3]=ub[4]=100.0; ret=dlevmar_bc_der(combust, jaccombust, p, x, m, n, lb, ub, 5000, opts, info, NULL, NULL, NULL); // with analytic Jacobian } break; #ifdef HAVE_LAPACK case 16: /* Hock - Schittkowski modified #1 problem 52 */ m=5; n=4; p[0]=2.0; p[1]=2.0; p[2]=2.0; p[3]=2.0; p[4]=2.0; for(i=0; i<n; i++) x[i]=0.0; { double A[3*5]={1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, -2.0, 0.0, 1.0, 0.0, 0.0, -1.0}, b[3]={0.0, 0.0, 0.0}; double lb[5], ub[5]; double weights[5]={2000.0, 2000.0, 2000.0, 2000.0, 2000.0}; // penalty terms weights lb[0]=-0.09; lb[1]=0.0; lb[2]=-DBL_MAX; lb[3]=-0.2; lb[4]=0.0; ub[0]=DBL_MAX; ub[1]=0.3; ub[2]=0.25; ub[3]=0.3; ub[4]=0.3; ret=dlevmar_blec_der(mod1hs52, jacmod1hs52, p, x, m, n, lb, ub, A, b, 3, weights, 1000, opts, info, NULL, NULL, NULL); // box & lin. constraints, analytic Jacobian //ret=dlevmar_blec_dif(mod1hs52, p, x, m, n, lb, ub, A, b, 3, weights, 1000, opts, info, NULL, NULL, NULL); // box & lin. constraints, no Jacobian } break; case 17: /* Schittkowski modified problem 235 */ m=3; n=2; p[0]=-2.0; p[1]=3.0; p[2]=1.0; for(i=0; i<n; i++) x[i]=0.0; { double A[2*3]={1.0, 0.0, 1.0, 0.0, 1.0, -4.0}, b[2]={-1.0, 0.0}; double lb[3], ub[3]; lb[0]=-DBL_MAX; lb[1]=0.1; lb[2]=0.7; ub[0]=DBL_MAX; ub[1]=2.9; ub[2]=DBL_MAX; ret=dlevmar_blec_der(mods235, jacmods235, p, x, m, n, lb, ub, A, b, 2, NULL, 1000, opts, info, NULL, NULL, NULL); // box & lin. constraints, analytic Jacobian //ret=dlevmar_blec_dif(mods235, p, x, m, n, lb, ub, A, b, 2, NULL, 1000, opts, info, NULL, NULL, NULL); // box & lin. constraints, no Jacobian } break; case 18: /* Boggs & Tolle modified problem 7 */ m=5; n=5; p[0]=-2.0; p[1]=1.0; p[2]=1.0; p[3]=1.0; p[4]=1.0; for(i=0; i<n; i++) x[i]=0.0; { double A[3*5]={1.0, 1.0, -1.0, 0.0, 0.0, 1.0, 1.0, 0.0, -1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0}, b[3]={1.0, 0.0, 0.5}; double lb[5], ub[5]; lb[0]=-DBL_MAX; lb[1]=-DBL_MAX; lb[2]=-DBL_MAX; lb[3]=-DBL_MAX; lb[4]=-0.3; ub[0]=0.7; ub[1]= DBL_MAX; ub[2]= DBL_MAX; ub[3]= DBL_MAX; ub[4]=DBL_MAX; ret=dlevmar_blec_der(modbt7, jacmodbt7, p, x, m, n, lb, ub, A, b, 3, NULL, 1000, opts, info, NULL, NULL, NULL); // box & lin. constraints, analytic Jacobian //ret=dlevmar_blec_dif(modbt7, p, x, m, n, lb, ub, A, b, 3, NULL, 10000, opts, info, NULL, NULL, NULL); // box & lin. constraints, no Jacobian } break; case 19: /* Hock - Schittkowski modified #2 problem 52 */ m=5; n=5; p[0]=2.0; p[1]=2.0; p[2]=2.0; p[3]=2.0; p[4]=2.0; for(i=0; i<n; i++) x[i]=0.0; { double C[3*5]={1.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, -2.0, 0.0, -1.0, 0.0, 0.0, 1.0}, d[3]={-1.0, -2.0, -7.0}; ret=dlevmar_bleic_der(mod2hs52, jacmod2hs52, p, x, m, n, NULL, NULL, NULL, NULL, 0, C, d, 3, 1000, opts, info, NULL, NULL, NULL); // lin. ineq. constraints, analytic Jacobian //ret=dlevmar_bleic_dif(mod2hs52, p, x, m, n, NULL, NULL, NULL, NULL, 0, C, d, 3, 1000, opts, info, NULL, NULL, NULL); // lin. ineq. constraints, no Jacobian } break; case 20: /* Hock - Schittkowski modified problem 76 */ m=4; n=4; p[0]=0.5; p[1]=0.5; p[2]=0.5; p[3]=0.5; for(i=0; i<n; i++) x[i]=0.0; { double A[1*4]={0.0, 1.0, 4.0, 0.0}, b[1]={1.5}; double C[2*4]={-1.0, -2.0, -1.0, -1.0, -3.0, -1.0, -2.0, 1.0}, d[2]={-5.0, -0.4}; double lb[4]={0.0, 0.0, 0.0, 0.0}; ret=dlevmar_bleic_der(modhs76, jacmodhs76, p, x, m, n, lb, NULL, A, b, 1, C, d, 2, 1000, opts, info, NULL, NULL, NULL); // lin. ineq. constraints, analytic Jacobian //ret=dlevmar_bleic_dif(modhs76, p, x, m, n, lb, NULL, A, b, 1, C, d, 2, 1000, opts, info, NULL, NULL, NULL); // lin. ineq. constraints, no Jacobian /* variations: * if no lb is used, the minimizer is (-0.1135922 0.1330097 0.3417476 0.07572816) * if the rhs of constr2 is 4.0, the minimizer is (0.0, 0.166667, 0.333333, 0.0) */ } break; #endif /* HAVE_LAPACK */ } /* switch */ printf("Results for %s:\n", probname[problem]); printf("Levenberg-Marquardt returned %d in %g iter, reason %g\nSolution: ", ret, info[5], info[6]); for(i=0; i<m; ++i) printf("%.7g ", p[i]); printf("\n\nMinimization info:\n"); for(i=0; i<LM_INFO_SZ; ++i) printf("%g ", info[i]); printf("\n"); return 0; }
/* Secant version of the LEVMAR_DER() function above: the jacobian is approximated with * the aid of finite differences (forward or central, see the comment for the opts argument) */ int LEVMAR_DIF( void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), /* functional relation describing measurements. A p \in R^m yields a \hat{x} \in R^n */ int (*visf)(LM_REAL *p, LM_REAL *hx, int m, int n, int iter, LM_REAL p_eL2, void *adata), /* visualisation function, can be used to print optimisation progress. If 0 is returned, the optimisation is stopped, and the current estimate will be used. */ LM_REAL *p, /* I/O: initial parameter estimates. On output has the estimated solution */ LM_REAL *x, /* I: measurement vector */ int m, /* I: parameter vector dimension (i.e. #unknowns) */ int n, /* I: measurement vector dimension */ int itmax, /* I: maximum number of iterations */ LM_REAL opts[5], /* I: opts[0-4] = minim. options [\mu, \epsilon1, \epsilon2, \epsilon3, \delta]. Respectively the * scale factor for initial \mu, stopping thresholds for ||J^T e||_inf, ||Dp||_2 and ||e||_2 and * the step used in difference approximation to the jacobian. Set to NULL for defaults to be used. * If \delta<0, the jacobian is approximated with central differences which are more accurate * (but slower!) compared to the forward differences employed by default. */ LM_REAL info[LM_INFO_SZ], /* O: information regarding the minimization. Set to NULL if don't care * info[0]= ||e||_2 at initial p. * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||Dp||_2, mu/max[J^T J]_ii ], all computed at estimated p. * info[5]= # iterations, * info[6]=reason for terminating: 1 - stopped by small gradient J^T e * 2 - stopped by small Dp * 3 - stopped by itmax * 4 - singular matrix. Restart from current p with increased mu * 5 - no further error reduction is possible. Restart with increased mu * 6 - stopped by small ||e||_2 * 7 - stopped by user * info[7]= # function evaluations * info[8]= # jacobian evaluations */ LM_REAL *work, /* working memory, allocate if NULL */ LM_REAL *covar, /* O: Covariance matrix corresponding to LS solution; mxm. Set to NULL if not needed. */ void *adata) /* pointer to possibly additional data, passed uninterpreted to func. * Set to NULL if not needed */ { register int i, j, k, l; int worksz, freework=0, issolved; /* temp work arrays */ LM_REAL *e, /* nx1 */ *hx, /* \hat{x}_i, nx1 */ *jacTe, /* J^T e_i mx1 */ *jac, /* nxm */ *jacTjac, /* mxm */ *Dp, /* mx1 */ *diag_jacTjac, /* diagonal of J^T J, mx1 */ *pDp, /* p + Dp, mx1 */ *wrk; /* nx1 */ int using_ffdif=1; LM_REAL *wrk2=NULL; /* nx1, used for differentiating with central differences only */ register LM_REAL mu, /* damping constant */ tmp; /* mainly used in matrix & vector multiplications */ LM_REAL p_eL2, jacTe_inf, pDp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+Dp)||_2 */ LM_REAL p_L2, Dp_L2=LM_REAL_MAX, dF, dL; LM_REAL tau, eps1, eps2, eps2_sq, eps3, delta; LM_REAL init_p_eL2; int nu, nu2, stop, nfev, njap=0, K=(m>=10)? m: 10, updjac, updp=1, newjac; const int nm=n*m; mu=jacTe_inf=p_L2=0.0; /* -Wall */ stop=updjac=newjac=0; /* -Wall */ if(n<m){ fprintf(stderr, LCAT(LEVMAR_DIF, "(): cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n"), n, m); return -1; } if(opts){ tau=opts[0]; eps1=opts[1]; eps2=opts[2]; eps2_sq=opts[2]*opts[2]; eps3=opts[3]; delta=opts[4]; if(delta<0.0){ delta=-delta; /* make positive */ using_ffdif=0; /* use central differencing */ wrk2=(LM_REAL *)malloc(n*sizeof(LM_REAL)); if(!wrk2){ fprintf(stderr, LCAT(LEVMAR_DIF, "(): memory allocation request for 'wrk2' failed\n")); return -1; } } } else{ // use default values tau=CNST(LM_INIT_MU); eps1=CNST(LM_STOP_THRESH); eps2=CNST(LM_STOP_THRESH); eps2_sq=CNST(LM_STOP_THRESH)*CNST(LM_STOP_THRESH); eps3=CNST(LM_STOP_THRESH); delta=CNST(LM_DIFF_DELTA); } if(!work){ worksz=LM_DIF_WORKSZ(m, n); //3*n+4*m + n*m + m*m; work=(LM_REAL *)malloc(worksz*sizeof(LM_REAL)); /* allocate a big chunk in one step */ if(!work){ fprintf(stderr, LCAT(LEVMAR_DIF, "(): memory allocation request failed\n")); return -1; } freework=1; } /* set up work arrays */ e=work; hx=e + n; jacTe=hx + n; jac=jacTe + m; jacTjac=jac + nm; Dp=jacTjac + m*m; diag_jacTjac=Dp + m; pDp=diag_jacTjac + m; wrk=pDp + m; /* compute e=x - f(p) and its L2 norm */ (*func)(p, hx, m, n, adata); nfev=1; for(i=0, p_eL2=0.0; i<n; ++i){ e[i]=tmp=x[i]-hx[i]; p_eL2+=tmp*tmp; } init_p_eL2=p_eL2; nu=20; /* force computation of J */ for(k=0; k<itmax; ++k){ /* Note that p and e have been updated at a previous iteration */ if(p_eL2<=eps3){ /* error is small */ stop=6; break; } /* Compute the jacobian J at p, J^T J, J^T e, ||J^T e||_inf and ||p||^2. * The symmetry of J^T J is again exploited for speed */ if((updp && nu>16) || updjac==K){ /* compute difference approximation to J */ if(using_ffdif){ /* use forward differences */ FDIF_FORW_JAC_APPROX(func, p, hx, wrk, delta, jac, m, n, adata); ++njap; nfev+=m; } else{ /* use central differences */ FDIF_CENT_JAC_APPROX(func, p, wrk, wrk2, delta, jac, m, n, adata); ++njap; nfev+=2*m; } nu=2; updjac=0; updp=0; newjac=1; } if(newjac){ /* jacobian has changed, recompute J^T J, J^t e, etc */ newjac=0; /* J^T J, J^T e */ if(nm<=__BLOCKSZ__SQ){ // this is a small problem /* This is the straightforward way to compute J^T J, J^T e. However, due to * its noncontinuous memory access pattern, it incures many cache misses when * applied to large minimization problems (i.e. problems involving a large * number of free variables and measurements), in which J is too large to * fit in the L1 cache. For such problems, a cache-efficient blocking scheme * is preferable. * * Thanks to John Nitao of Lawrence Livermore Lab for pointing out this * performance problem. * * On the other hand, the straightforward algorithm is faster on small * problems since in this case it avoids the overheads of blocking. */ for(i=0; i<m; ++i){ for(j=i; j<m; ++j){ int lm; for(l=0, tmp=0.0; l<n; ++l){ lm=l*m; tmp+=jac[lm+i]*jac[lm+j]; } jacTjac[i*m+j]=jacTjac[j*m+i]=tmp; } /* J^T e */ for(l=0, tmp=0.0; l<n; ++l) tmp+=jac[l*m+i]*e[l]; jacTe[i]=tmp; } } else{ // this is a large problem /* Cache efficient computation of J^T J based on blocking */ TRANS_MAT_MAT_MULT(jac, jacTjac, n, m); /* cache efficient computation of J^T e */ for(i=0; i<m; ++i) jacTe[i]=0.0; for(i=0; i<n; ++i){ register LM_REAL *jacrow; for(l=0, jacrow=jac+i*m, tmp=e[i]; l<m; ++l) jacTe[l]+=jacrow[l]*tmp; } } /* Compute ||J^T e||_inf and ||p||^2 */ for(i=0, p_L2=jacTe_inf=0.0; i<m; ++i){ if(jacTe_inf < (tmp=FABS(jacTe[i]))) jacTe_inf=tmp; diag_jacTjac[i]=jacTjac[i*m+i]; /* save diagonal entries so that augmentation can be later canceled */ p_L2+=p[i]*p[i]; } //p_L2=sqrt(p_L2); } // call visualisation function if (visf) { if (visf(p, hx, m, n, k, p_eL2, adata) == 0) { stop = 7; break; } } #if 0 if(!(k%10)){ printf("Iter: %d, estimate: ", k); for(i=0; i<m; ++i) printf("%.9g ", p[i]); printf("-- errors %.9g %0.9g\n", jacTe_inf, p_eL2); } #endif /* check for convergence */ if((jacTe_inf <= eps1)){ Dp_L2=0.0; /* no increment for p in this case */ stop=1; break; } /* compute initial damping factor */ if(k==0){ for(i=0, tmp=LM_REAL_MIN; i<m; ++i) if(diag_jacTjac[i]>tmp) tmp=diag_jacTjac[i]; /* find max diagonal element */ mu=tau*tmp; } /* determine increment using adaptive damping */ /* augment normal equations */ for(i=0; i<m; ++i) jacTjac[i*m+i]+=mu; /* solve augmented equations */ #ifdef HAVE_LAPACK /* 5 alternatives are available: LU, Cholesky, 2 variants of QR decomposition and SVD. * Cholesky is the fastest but might be inaccurate; QR is slower but more accurate; * SVD is the slowest but most accurate; LU offers a tradeoff between accuracy and speed */ issolved=AX_EQ_B_LU(jacTjac, jacTe, Dp, m); //issolved=AX_EQ_B_CHOL(jacTjac, jacTe, Dp, m); //issolved=AX_EQ_B_QR(jacTjac, jacTe, Dp, m); //issolved=AX_EQ_B_QRLS(jacTjac, jacTe, Dp, m, m); //issolved=AX_EQ_B_SVD(jacTjac, jacTe, Dp, m); #else /* use the LU included with levmar */ issolved=AX_EQ_B_LU(jacTjac, jacTe, Dp, m); #endif /* HAVE_LAPACK */ if(issolved){ /* compute p's new estimate and ||Dp||^2 */ for(i=0, Dp_L2=0.0; i<m; ++i){ pDp[i]=p[i] + (tmp=Dp[i]); Dp_L2+=tmp*tmp; } //Dp_L2=sqrt(Dp_L2); if(Dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */ //if(Dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */ stop=2; break; } if(Dp_L2>=(p_L2+eps2)/(CNST(EPSILON)*CNST(EPSILON))){ /* almost singular */ //if(Dp_L2>=(p_L2+eps2)/CNST(EPSILON)){ /* almost singular */ stop=4; break; } (*func)(pDp, wrk, m, n, adata); ++nfev; /* evaluate function at p + Dp */ for(i=0, pDp_eL2=0.0; i<n; ++i){ /* compute ||e(pDp)||_2 */ tmp=x[i]-wrk[i]; pDp_eL2+=tmp*tmp; } dF=p_eL2-pDp_eL2; if(updp || dF>0){ /* update jac */ for(i=0; i<n; ++i){ for(l=0, tmp=0.0; l<m; ++l) tmp+=jac[i*m+l]*Dp[l]; /* (J * Dp)[i] */ tmp=(wrk[i] - hx[i] - tmp)/Dp_L2; /* (f(p+dp)[i] - f(p)[i] - (J * Dp)[i])/(dp^T*dp) */ for(j=0; j<m; ++j) jac[i*m+j]+=tmp*Dp[j]; } ++updjac; newjac=1; } for(i=0, dL=0.0; i<m; ++i) dL+=Dp[i]*(mu*Dp[i]+jacTe[i]); if(dL>0.0 && dF>0.0){ /* reduction in error, increment is accepted */ dF=(CNST(2.0)*dF/dL-CNST(1.0)); tmp=dF*dF*dF; tmp=CNST(1.0)-tmp*tmp*dF; mu=mu*( (tmp>=CNST(ONE_THIRD))? tmp : CNST(ONE_THIRD) ); nu=2; for(i=0 ; i<m; ++i) /* update p's estimate */ p[i]=pDp[i]; for(i=0; i<n; ++i){ /* update e, hx and ||e||_2 */ e[i]=x[i]-wrk[i]; hx[i]=wrk[i]; } p_eL2=pDp_eL2; updp=1; continue; } } /* if this point is reached, either the linear system could not be solved or * the error did not reduce; in any case, the increment must be rejected */ mu*=nu; nu2=nu<<1; // 2*nu; if(nu2<=nu){ /* nu has wrapped around (overflown). Thanks to Frank Jordan for spotting this case */ stop=5; break; } nu=nu2; for(i=0; i<m; ++i) /* restore diagonal J^T J entries */ jacTjac[i*m+i]=diag_jacTjac[i]; } if(k>=itmax) stop=3; for(i=0; i<m; ++i) /* restore diagonal J^T J entries */ jacTjac[i*m+i]=diag_jacTjac[i]; if(info){ info[0]=init_p_eL2; info[1]=p_eL2; info[2]=jacTe_inf; info[3]=Dp_L2; for(i=0, tmp=LM_REAL_MIN; i<m; ++i) if(tmp<jacTjac[i*m+i]) tmp=jacTjac[i*m+i]; info[4]=mu/tmp; info[5]=(LM_REAL)k; info[6]=(LM_REAL)stop; info[7]=(LM_REAL)nfev; info[8]=(LM_REAL)njap; } /* covariance matrix */ if(covar){ LEVMAR_COVAR(jacTjac, covar, p_eL2, m, n); } if(freework) free(work); if(wrk2) free(wrk2); return (stop!=4)? k : -1; }
extern "C" LEVMARDLL_API void StochFit(BoxReflSettings* InitStruct, double parameters[], double covararray[], int paramsize, double info[], double ParamArray[], double chisquarearray[], int* paramarraysize) { FastReflcalc Refl; Refl.init(InitStruct); double* Reflectivity = InitStruct->Refl; int QSize = InitStruct->QPoints; double* parampercs = InitStruct->ParamPercs; //Setup the fit double opts[LM_OPTS_SZ]; opts[0]=LM_INIT_MU; opts[1]=1E-15; opts[2]=1E-15; opts[3]=1E-20; opts[4]=-LM_DIFF_DELTA; // relevant only if the finite difference jacobian version is used //Allocate a dummy array - Our real calculation is done in Refl.objective double* xvec = new double[InitStruct->QPoints] ; for(int i = 0; i < InitStruct->QPoints; i++) { xvec[i] = 0; } //Copy starting solution double* origguess = new double[paramsize]; memcpy(origguess, parameters, sizeof(double)*paramsize); if(InitStruct->OneSigma) Refl.mkdensityonesigma(parameters, paramsize); else Refl.mkdensity(parameters, paramsize); Refl.myrfdispatch(); double bestchisquare = 0; for(int i = 0; i < InitStruct->QPoints; i++) { bestchisquare += (log(Refl.reflpt[i])-log(Reflectivity[i]))*(log(Refl.reflpt[i])-log(Reflectivity[i])); } double tempinfoarray[9]; tempinfoarray[1] = bestchisquare; double* tempcovararray = new double[paramsize*paramsize]; memset(tempcovararray,0.0, sizeof(double)*paramsize*paramsize); ParameterContainer original(parameters, tempcovararray, paramsize,InitStruct->OneSigma, tempinfoarray, parampercs[6]); delete[] tempcovararray; vector<ParameterContainer> temp; temp.reserve(6000); omp_set_num_threads(omp_get_num_procs()); #pragma omp parallel { FastReflcalc locRefl; locRefl.init(InitStruct); //Initialize random number generator int seed = time_seed(); CRandomMersenne randgen(time_seed()+omp_get_thread_num()); ParameterContainer localanswer; double locparameters[20]; double locbestchisquare = bestchisquare; double bestparam[20]; int vecsize = 1000; int veccount = 0; ParameterContainer* vec = (ParameterContainer*)malloc(vecsize*sizeof(ParameterContainer)); double locinfo[9]; //Allocate workspace - these will be private to each thread double* work, *covar; work=(double*)malloc((LM_DIF_WORKSZ(paramsize, QSize)+paramsize*QSize)*sizeof(double)); covar=work+LM_DIF_WORKSZ(paramsize, QSize); #pragma omp for schedule(runtime) for(int i = 0; i < InitStruct->Iterations;i++) { locparameters[0] = randgen.IRandom(origguess[0]*parampercs[4], origguess[0]*parampercs[5]); for(int k = 0; k< InitStruct->Boxes; k++) { if(InitStruct->OneSigma) { locparameters[2*k+1] = randgen.IRandom(origguess[2*k+1]*parampercs[0], origguess[2*k+1]*parampercs[1]); locparameters[2*k+2] = randgen.IRandom(origguess[2*k+2]*parampercs[2], origguess[2*k+2]*parampercs[3]); } else { locparameters[3*k+1] = randgen.IRandom(origguess[3*k+1]*parampercs[0], origguess[3*k+1]*parampercs[1]); locparameters[3*k+2] = randgen.IRandom(origguess[3*k+2]*parampercs[2], origguess[3*k+2]*parampercs[3]); locparameters[3*k+3] = randgen.IRandom(origguess[3*k+3]*parampercs[4], origguess[3*k+3]*parampercs[5]); } } locparameters[paramsize-1] = origguess[paramsize-1]; if(InitStruct->UL == NULL) dlevmar_dif(locRefl.objective, locparameters, xvec, paramsize, InitStruct->QPoints, 500, opts, locinfo, work,covar,(void*)(&locRefl)); else dlevmar_bc_dif(locRefl.objective, locparameters, xvec, paramsize, InitStruct->QPoints, InitStruct->LL, InitStruct->UL, 500, opts, locinfo, work,covar,(void*)(&locRefl)); localanswer.SetContainer(locparameters,covar,paramsize,InitStruct->OneSigma,locinfo, parampercs[6]); if(locinfo[1] < bestchisquare && localanswer.IsReasonable()) { //Resize the private arrays if we need the space if(veccount+2 == vecsize) { vecsize += 1000; vec = (ParameterContainer*)realloc(vec,vecsize*sizeof(ParameterContainer)); } bool unique = true; int arraysize = veccount; //Check if the answer already exists for(int i = 0; i < arraysize; i++) { if(localanswer == vec[i]) { unique = false; i = arraysize; } } //If the answer is unique add it to our set of answers if(unique) { vec[veccount] = localanswer; veccount++; } } } #pragma omp critical (AddVecs) { for(int i = 0; i < veccount; i++) { temp.push_back(vec[i]); } } free(vec); free(work); } // delete[] xvec; delete[] origguess; //Sort the answers //Get the total number of answers temp.push_back(original); vector<ParameterContainer> allsolutions; allsolutions.reserve(6000); int tempsize = temp.size(); allsolutions.push_back(temp[0]); for(int i = 1; i < tempsize; i++) { int allsolutionssize = allsolutions.size(); for(int j = 0; j < allsolutionssize;j++) { if(temp[i] == allsolutions[j]) { break; } if(j == allsolutionssize-1) { allsolutions.push_back(temp[i]); } } } if(allsolutions.size() > 0) { sort(allsolutions.begin(), allsolutions.end()); } for(int i = 0; i < allsolutions.size() && i < 1000 && allsolutions.size() > 0; i++) { for(int j = 0; j < paramsize; j++) { ParamArray[(i)*paramsize+j] = (allsolutions.at(i).GetParamArray())[j]; covararray[(i)*paramsize+j] = (allsolutions.at(i).GetCovarArray())[j]; } memcpy(info, allsolutions.at(i).GetInfoArray(), 9* sizeof(double)); info += 9; chisquarearray[i] = (allsolutions.at(i).GetScore()); } *paramarraysize = min(allsolutions.size(),999); }
/* Secant version of the LEVMAR_DER() function above: the Jacobian is approximated with * the aid of finite differences (forward or central, see the comment for the opts argument) */ int LEVMAR_DIF( void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), /* functional relation describing measurements. A p \in R^m yields a \hat{x} \in R^n */ LM_REAL *p, /* I/O: initial parameter estimates. On output has the estimated solution */ LM_REAL *x, /* I: measurement vector. NULL implies a zero vector */ int m, /* I: parameter vector dimension (i.e. #unknowns) */ int n, /* I: measurement vector dimension */ int itmax, /* I: maximum number of iterations */ LM_REAL opts[5], /* I: opts[0-4] = minim. options [\mu, \epsilon1, \epsilon2, \epsilon3, \delta]. Respectively the * scale factor for initial \mu, stopping thresholds for ||J^T e||_inf, ||Dp||_2 and ||e||_2 and * the step used in difference approximation to the Jacobian. Set to NULL for defaults to be used. * If \delta<0, the Jacobian is approximated with central differences which are more accurate * (but slower!) compared to the forward differences employed by default. */ LM_REAL info[LM_INFO_SZ], /* O: information regarding the minimization. Set to NULL if don't care * info[0]= ||e||_2 at initial p. * info[1-4]=[ ||e||_2, ||J^T e||_inf, ||Dp||_2, mu/max[J^T J]_ii ], all computed at estimated p. * info[5]= # iterations, * info[6]=reason for terminating: 1 - stopped by small gradient J^T e * 2 - stopped by small Dp * 3 - stopped by itmax * 4 - singular matrix. Restart from current p with increased mu * 5 - no further error reduction is possible. Restart with increased mu * 6 - stopped by small ||e||_2 * 7 - stopped by invalid (i.e. NaN or Inf) "func" values. This is a user error * info[7]= # function evaluations * info[8]= # Jacobian evaluations * info[9]= # linear systems solved, i.e. # attempts for reducing error */ LM_REAL *work, /* working memory at least LM_DIF_WORKSZ() reals large, allocated if NULL */ LM_REAL *covar, /* O: Covariance matrix corresponding to LS solution; mxm. Set to NULL if not needed. */ void *adata) /* pointer to possibly additional data, passed uninterpreted to func. * Set to NULL if not needed */ { register int i, j, k, l; int worksz, freework=0, issolved; /* temp work arrays */ LM_REAL *e, /* nx1 */ *hx, /* \hat{x}_i, nx1 */ *jacTe, /* J^T e_i mx1 */ *jac, /* nxm */ *jacTjac, /* mxm */ *Dp, /* mx1 */ *diag_jacTjac, /* diagonal of J^T J, mx1 */ *pDp, /* p + Dp, mx1 */ *wrk, /* nx1 */ *wrk2; /* nx1, used only for holding a temporary e vector and when differentiating with central differences */ int using_ffdif=1; register LM_REAL mu, /* damping constant */ tmp; /* mainly used in matrix & vector multiplications */ LM_REAL p_eL2, jacTe_inf, pDp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+Dp)||_2 */ LM_REAL p_L2, Dp_L2=LM_REAL_MAX, dF, dL; LM_REAL tau, eps1, eps2, eps2_sq, eps3, delta; LM_REAL init_p_eL2; int nu, nu2, stop=0, nfev, njap=0, nlss=0, K=(m>=10)? m: 10, updjac, updp=1, newjac; const int nm=n*m; int (*linsolver)(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)=NULL; mu=jacTe_inf=p_L2=0.0; /* -Wall */ updjac=newjac=0; /* -Wall */ if(n<m){ fprintf(stderr, LCAT(LEVMAR_DIF, "(): cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n"), n, m); return LM_ERROR; } if(opts){ tau=opts[0]; eps1=opts[1]; eps2=opts[2]; eps2_sq=opts[2]*opts[2]; eps3=opts[3]; delta=opts[4]; if(delta<0.0){ delta=-delta; /* make positive */ using_ffdif=0; /* use central differencing */ } } else{ // use default values tau=LM_CNST(LM_INIT_MU); eps1=LM_CNST(LM_STOP_THRESH); eps2=LM_CNST(LM_STOP_THRESH); eps2_sq=LM_CNST(LM_STOP_THRESH)*LM_CNST(LM_STOP_THRESH); eps3=LM_CNST(LM_STOP_THRESH); delta=LM_CNST(LM_DIFF_DELTA); } if(!work){ worksz=LM_DIF_WORKSZ(m, n); //4*n+4*m + n*m + m*m; work=(LM_REAL *)malloc(worksz*sizeof(LM_REAL)); /* allocate a big chunk in one step */ if(!work){ fprintf(stderr, LCAT(LEVMAR_DIF, "(): memory allocation request failed\n")); return LM_ERROR; } freework=1; } /* set up work arrays */ e=work; hx=e + n; jacTe=hx + n; jac=jacTe + m; jacTjac=jac + nm; Dp=jacTjac + m*m; diag_jacTjac=Dp + m; pDp=diag_jacTjac + m; wrk=pDp + m; wrk2=wrk + n; /* compute e=x - f(p) and its L2 norm */ (*func)(p, hx, m, n, adata); nfev=1; /* ### e=x-hx, p_eL2=||e|| */ #if 1 p_eL2=LEVMAR_L2NRMXMY(e, x, hx, n); #else for(i=0, p_eL2=0.0; i<n; ++i){ e[i]=tmp=x[i]-hx[i]; p_eL2+=tmp*tmp; } #endif init_p_eL2=p_eL2; if(!LM_FINITE(p_eL2)) stop=7; nu=20; /* force computation of J */ for(k=0; k<itmax && !stop; ++k){ /* Note that p and e have been updated at a previous iteration */ if(p_eL2<=eps3){ /* error is small */ stop=6; break; } /* Compute the Jacobian J at p, J^T J, J^T e, ||J^T e||_inf and ||p||^2. * The symmetry of J^T J is again exploited for speed */ if((updp && nu>16) || updjac==K){ /* compute difference approximation to J */ if(using_ffdif){ /* use forward differences */ LEVMAR_FDIF_FORW_JAC_APPROX(func, p, hx, wrk, delta, jac, m, n, adata); ++njap; nfev+=m; } else{ /* use central differences */ LEVMAR_FDIF_CENT_JAC_APPROX(func, p, wrk, wrk2, delta, jac, m, n, adata); ++njap; nfev+=2*m; } nu=2; updjac=0; updp=0; newjac=1; } if(newjac){ /* Jacobian has changed, recompute J^T J, J^t e, etc */ newjac=0; /* J^T J, J^T e */ if(nm<=__BLOCKSZ__SQ){ // this is a small problem /* J^T*J_ij = \sum_l J^T_il * J_lj = \sum_l J_li * J_lj. * Thus, the product J^T J can be computed using an outer loop for * l that adds J_li*J_lj to each element ij of the result. Note that * with this scheme, the accesses to J and JtJ are always along rows, * therefore induces less cache misses compared to the straightforward * algorithm for computing the product (i.e., l loop is innermost one). * A similar scheme applies to the computation of J^T e. * However, for large minimization problems (i.e., involving a large number * of unknowns and measurements) for which J/J^T J rows are too large to * fit in the L1 cache, even this scheme incures many cache misses. In * such cases, a cache-efficient blocking scheme is preferable. * * Thanks to John Nitao of Lawrence Livermore Lab for pointing out this * performance problem. * * Note that the non-blocking algorithm is faster on small * problems since in this case it avoids the overheads of blocking. */ register int l; register LM_REAL alpha, *jaclm, *jacTjacim; /* looping downwards saves a few computations */ for(i=m*m; i-->0; ) jacTjac[i]=0.0; for(i=m; i-->0; ) jacTe[i]=0.0; for(l=n; l-->0; ){ jaclm=jac+l*m; for(i=m; i-->0; ){ jacTjacim=jacTjac+i*m; alpha=jaclm[i]; //jac[l*m+i]; for(j=i+1; j-->0; ) /* j<=i computes lower triangular part only */ jacTjacim[j]+=jaclm[j]*alpha; //jacTjac[i*m+j]+=jac[l*m+j]*alpha /* J^T e */ jacTe[i]+=alpha*e[l]; } } for(i=m; i-->0; ) /* copy to upper part */ for(j=i+1; j<m; ++j) jacTjac[i*m+j]=jacTjac[j*m+i]; } else{ // this is a large problem /* Cache efficient computation of J^T J based on blocking */ LEVMAR_TRANS_MAT_MAT_MULT(jac, jacTjac, n, m); /* cache efficient computation of J^T e */ for(i=0; i<m; ++i) jacTe[i]=0.0; for(i=0; i<n; ++i){ register LM_REAL *jacrow; for(l=0, jacrow=jac+i*m, tmp=e[i]; l<m; ++l) jacTe[l]+=jacrow[l]*tmp; } } /* Compute ||J^T e||_inf and ||p||^2 */ for(i=0, p_L2=jacTe_inf=0.0; i<m; ++i){ if(jacTe_inf < (tmp=FABS(jacTe[i]))) jacTe_inf=tmp; diag_jacTjac[i]=jacTjac[i*m+i]; /* save diagonal entries so that augmentation can be later canceled */ p_L2+=p[i]*p[i]; } //p_L2=sqrt(p_L2); } #if 0 if(!(k%100)){ printf("Current estimate: "); for(i=0; i<m; ++i) printf("%.9g ", p[i]); printf("-- errors %.9g %0.9g\n", jacTe_inf, p_eL2); } #endif /* check for convergence */ if((jacTe_inf <= eps1)){ Dp_L2=0.0; /* no increment for p in this case */ stop=1; break; } /* compute initial damping factor */ if(k==0){ for(i=0, tmp=LM_REAL_MIN; i<m; ++i) if(diag_jacTjac[i]>tmp) tmp=diag_jacTjac[i]; /* find max diagonal element */ mu=tau*tmp; } /* determine increment using adaptive damping */ /* augment normal equations */ for(i=0; i<m; ++i) jacTjac[i*m+i]+=mu; /* solve augmented equations */ #ifdef HAVE_LAPACK /* 7 alternatives are available: LU, Cholesky + Cholesky with PLASMA, LDLt, 2 variants of QR decomposition and SVD. * For matrices with dimensions of at least a few hundreds, the PLASMA implementation of Cholesky is the fastest. * From the serial solvers, Cholesky is the fastest but might occasionally be inapplicable due to numerical round-off; * QR is slower but more robust; SVD is the slowest but most robust; LU is quite robust but * slower than LDLt; LDLt offers a good tradeoff between robustness and speed */ issolved=AX_EQ_B_BK(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_BK; //issolved=AX_EQ_B_LU(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_LU; //issolved=AX_EQ_B_CHOL(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_CHOL; #ifdef HAVE_PLASMA //issolved=AX_EQ_B_PLASMA_CHOL(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_PLASMA_CHOL; #endif //issolved=AX_EQ_B_QR(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_QR; //issolved=AX_EQ_B_QRLS(jacTjac, jacTe, Dp, m, m); ++nlss; linsolver=(int (*)(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m))AX_EQ_B_QRLS; //issolved=AX_EQ_B_SVD(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_SVD; #else /* use the LU included with levmar */ issolved=AX_EQ_B_LU(jacTjac, jacTe, Dp, m); ++nlss; linsolver=AX_EQ_B_LU; #endif /* HAVE_LAPACK */ if(issolved){ /* compute p's new estimate and ||Dp||^2 */ for(i=0, Dp_L2=0.0; i<m; ++i){ pDp[i]=p[i] + (tmp=Dp[i]); Dp_L2+=tmp*tmp; } //Dp_L2=sqrt(Dp_L2); if(Dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */ //if(Dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */ stop=2; break; } if(Dp_L2>=(p_L2+eps2)/(LM_CNST(EPSILON)*LM_CNST(EPSILON))){ /* almost singular */ //if(Dp_L2>=(p_L2+eps2)/LM_CNST(EPSILON)){ /* almost singular */ stop=4; break; } (*func)(pDp, wrk, m, n, adata); ++nfev; /* evaluate function at p + Dp */ /* compute ||e(pDp)||_2 */ /* ### wrk2=x-wrk, pDp_eL2=||wrk2|| */ #if 1 pDp_eL2=LEVMAR_L2NRMXMY(wrk2, x, wrk, n); #else for(i=0, pDp_eL2=0.0; i<n; ++i){ wrk2[i]=tmp=x[i]-wrk[i]; pDp_eL2+=tmp*tmp; } #endif if(!LM_FINITE(pDp_eL2)){ /* sum of squares is not finite, most probably due to a user error. * This check makes sure that the loop terminates early in the case * of invalid input. Thanks to Steve Danauskas for suggesting it */ stop=7; break; } dF=p_eL2-pDp_eL2; if(updp || dF>0){ /* update jac */ for(i=0; i<n; ++i){ for(l=0, tmp=0.0; l<m; ++l) tmp+=jac[i*m+l]*Dp[l]; /* (J * Dp)[i] */ tmp=(wrk[i] - hx[i] - tmp)/Dp_L2; /* (f(p+dp)[i] - f(p)[i] - (J * Dp)[i])/(dp^T*dp) */ for(j=0; j<m; ++j) jac[i*m+j]+=tmp*Dp[j]; } ++updjac; newjac=1; } for(i=0, dL=0.0; i<m; ++i) dL+=Dp[i]*(mu*Dp[i]+jacTe[i]); if(dL>0.0 && dF>0.0){ /* reduction in error, increment is accepted */ tmp=(LM_CNST(2.0)*dF/dL-LM_CNST(1.0)); tmp=LM_CNST(1.0)-tmp*tmp*tmp; mu=mu*( (tmp>=LM_CNST(ONE_THIRD))? tmp : LM_CNST(ONE_THIRD) ); nu=2; for(i=0 ; i<m; ++i) /* update p's estimate */ p[i]=pDp[i]; for(i=0; i<n; ++i){ /* update e, hx and ||e||_2 */ e[i]=wrk2[i]; //x[i]-wrk[i]; hx[i]=wrk[i]; } p_eL2=pDp_eL2; updp=1; continue; } } /* if this point is reached, either the linear system could not be solved or * the error did not reduce; in any case, the increment must be rejected */ mu*=nu; nu2=nu<<1; // 2*nu; if(nu2<=nu){ /* nu has wrapped around (overflown). Thanks to Frank Jordan for spotting this case */ stop=5; break; } nu=nu2; for(i=0; i<m; ++i) /* restore diagonal J^T J entries */ jacTjac[i*m+i]=diag_jacTjac[i]; } if(k>=itmax) stop=3; for(i=0; i<m; ++i) /* restore diagonal J^T J entries */ jacTjac[i*m+i]=diag_jacTjac[i]; if(info){ info[0]=init_p_eL2; info[1]=p_eL2; info[2]=jacTe_inf; info[3]=Dp_L2; for(i=0, tmp=LM_REAL_MIN; i<m; ++i) if(tmp<jacTjac[i*m+i]) tmp=jacTjac[i*m+i]; info[4]=mu/tmp; info[5]=(LM_REAL)k; info[6]=(LM_REAL)stop; info[7]=(LM_REAL)nfev; info[8]=(LM_REAL)njap; info[9]=(LM_REAL)nlss; } /* covariance matrix */ if(covar){ LEVMAR_COVAR(jacTjac, covar, p_eL2, m, n); } if(freework) free(work); #ifdef LINSOLVERS_RETAIN_MEMORY if(linsolver) (*linsolver)(NULL, NULL, NULL, 0); #endif return (stop!=4 && stop!=7)? k : LM_ERROR; }