void work_() { long t; asm("nop"); pmem_in[2] = 'A'; t = *(long*)0x200000018; output_char('t'); output_q_hex(t); output_char(C_n); output_char(*(char*)0x0000088000000002); output_q_hex(*(char*)0x0000088000000002); output_char(C_n); asm("nop"); asm("nop"); *(pmem_in+2) = 'A'; output_char('u'); output_char(*(char*)0x0000088000000002); output_q_hex(*(char*)0x0000088000000002); output_char(C_n); work0(); work1(); work2(); return; }
void work() { nac = nzero = 0; for (int i = 0; i < n; ++i) { int x, y; scanf("%d%d", &x, &y); pt[i] = {x, y, 0}; } for (int i = 0; i < n; ++i) { std::memcpy(sorted, pt, n * sizeof(sorted[0])); std::swap(sorted[n - 1], sorted[i]); base = sorted[n - 1]; for (int i = 0; i < n; ++i) sorted[i].angle = getangle(sorted[i] - base); std::sort(sorted, sorted + n - 1, [](const Point& a, const Point& b){ //return cross(a - base, b - base) < 0; //return getangle(a - base) < getangle(b - base); return a.angle < b.angle; }); //std::cout << "BASE " << base.x << ' ' << base.y << std::endl; //for (int j = 0; j < n - 1; ++j) //std::cout << j << ' ' << sorted[j].x << ' ' << sorted[j].y << std::endl; work2(); } long long ttl = (long long) n * (n - 1) * (n - 2) / 2; long long fans = ttl - nac; //std::cout << "RESULT " << nac << ' ' << nzero << ' ' << ttl << std::endl; std::cout << ttl / 3 - fans << std::endl; }
void a40 (pair * p) { #pragma omp parallel sections { #pragma omp section incr_pair (p, work1 (), work2 ()); #pragma omp section incr_b (p, work3 ()); } }
void foo(int q) { int p = 2; #pragma omp parallel firstprivate(q, p) work1(p, q); // IPCP: call void @work1(i32 2, i32 %{{[._a-zA-Z0-9]*}}) #pragma omp parallel for firstprivate(p, q) for (int i = 0; i < q; i++) work2(i, p); // IPCP: call void @work2(i32 %{{[._a-zA-Z0-9]*}}, i32 2) #pragma omp target teams firstprivate(p) work12(p, p); // IPCP: call void @work12(i32 2, i32 2) }
TEST(fit_ellipse, test1) { int num_points = 7, status = 0; double maj_d = 0.0, min_d = 0.0, pa_d = 0.0; float maj_f = 0.0, min_f = 0.0, pa_f = 0.0; // Test double precision. { std::vector<double> x(num_points), y(num_points); std::vector<double> work1(5 * num_points), work2(5 * num_points); x[0] = -0.1686; x[1] = -0.0921; x[2] = 0.0765; x[3] = 0.1686; x[4] = 0.0921; x[5] = -0.0765; x[6] = -0.1686; y[0] = 0.7282; y[1] = 0.6994; y[2] = 0.6675; y[3] = 0.6643; y[4] = 0.7088; y[5] = 0.7407; y[6] = 0.7282; oskar_fit_ellipse_d(&maj_d, &min_d, &pa_d, num_points, &x[0], &y[0], &work1[0], &work2[0], &status); EXPECT_EQ(0, status) << oskar_get_error_string(status); } // Test single precision. { std::vector<float> x(num_points), y(num_points); std::vector<float> work1(5 * num_points), work2(5 * num_points); x[0] = -0.1686; x[1] = -0.0921; x[2] = 0.0765; x[3] = 0.1686; x[4] = 0.0921; x[5] = -0.0765; x[6] = -0.1686; y[0] = 0.7282; y[1] = 0.6994; y[2] = 0.6675; y[3] = 0.6643; y[4] = 0.7088; y[5] = 0.7407; y[6] = 0.7282; oskar_fit_ellipse_f(&maj_f, &min_f, &pa_f, num_points, &x[0], &y[0], &work1[0], &work2[0], &status); EXPECT_EQ(0, status) << oskar_get_error_string(status); } // Compare results. EXPECT_NEAR(maj_d, 0.3608619735, 1e-9); EXPECT_NEAR(min_d, 0.0494223702, 1e-9); EXPECT_NEAR(pa_d, -1.3865537748, 1e-9); EXPECT_NEAR(maj_d, maj_f, 1e-5); EXPECT_NEAR(min_d, min_f, 1e-5); EXPECT_NEAR(pa_d, pa_f, 1e-5); }
/************************************************************************* Internal working subroutine for bidiagonal decomposition *************************************************************************/ bool bidiagonalsvddecompositioninternal(ap::real_1d_array& d, ap::real_1d_array e, int n, bool isupper, bool isfractionalaccuracyrequired, ap::real_2d_array& u, int ustart, int nru, ap::real_2d_array& c, int cstart, int ncc, ap::real_2d_array& vt, int vstart, int ncvt) { bool result; int i; int idir; int isub; int iter; int j; int ll = 0; // Eliminate compiler warning. int lll; int m; int maxit; int oldll; int oldm; double abse; double abss; double cosl; double cosr; double cs; double eps; double f; double g; double h; double mu; double oldcs; double oldsn = 0.; // Eliminate compiler warning. double r; double shift; double sigmn; double sigmx; double sinl; double sinr; double sll; double smax; double smin; double sminl; double sminoa; double sn; double thresh; double tol; double tolmul; double unfl; ap::real_1d_array work0; ap::real_1d_array work1; ap::real_1d_array work2; ap::real_1d_array work3; int maxitr; bool matrixsplitflag; bool iterflag; ap::real_1d_array utemp; ap::real_1d_array vttemp; ap::real_1d_array ctemp; ap::real_1d_array etemp; bool fwddir; double tmp; int mm1; int mm0; bool bchangedir; int uend; int cend; int vend; result = true; if( n==0 ) { return result; } if( n==1 ) { if( d(1)<0 ) { d(1) = -d(1); if( ncvt>0 ) { ap::vmul(&vt(vstart, vstart), ap::vlen(vstart,vstart+ncvt-1), -1); } } return result; } // // init // work0.setbounds(1, n-1); work1.setbounds(1, n-1); work2.setbounds(1, n-1); work3.setbounds(1, n-1); uend = ustart+ap::maxint(nru-1, 0); vend = vstart+ap::maxint(ncvt-1, 0); cend = cstart+ap::maxint(ncc-1, 0); utemp.setbounds(ustart, uend); vttemp.setbounds(vstart, vend); ctemp.setbounds(cstart, cend); maxitr = 12; fwddir = true; // // resize E from N-1 to N // etemp.setbounds(1, n); for(i = 1; i <= n-1; i++) { etemp(i) = e(i); } e.setbounds(1, n); for(i = 1; i <= n-1; i++) { e(i) = etemp(i); } e(n) = 0; idir = 0; // // Get machine constants // eps = ap::machineepsilon; unfl = ap::minrealnumber; // // If matrix lower bidiagonal, rotate to be upper bidiagonal // by applying Givens rotations on the left // if( !isupper ) { for(i = 1; i <= n-1; i++) { generaterotation(d(i), e(i), cs, sn, r); d(i) = r; e(i) = sn*d(i+1); d(i+1) = cs*d(i+1); work0(i) = cs; work1(i) = sn; } // // Update singular vectors if desired // if( nru>0 ) { applyrotationsfromtheright(fwddir, ustart, uend, 1+ustart-1, n+ustart-1, work0, work1, u, utemp); } if( ncc>0 ) { applyrotationsfromtheleft(fwddir, 1+cstart-1, n+cstart-1, cstart, cend, work0, work1, c, ctemp); } } // // Compute singular values to relative accuracy TOL // (By setting TOL to be negative, algorithm will compute // singular values to absolute accuracy ABS(TOL)*norm(input matrix)) // tolmul = ap::maxreal(double(10), ap::minreal(double(100), pow(eps, -0.125))); tol = tolmul*eps; if( !isfractionalaccuracyrequired ) { tol = -tol; } // // Compute approximate maximum, minimum singular values // smax = 0; for(i = 1; i <= n; i++) { smax = ap::maxreal(smax, fabs(d(i))); } for(i = 1; i <= n-1; i++) { smax = ap::maxreal(smax, fabs(e(i))); } sminl = 0; if( tol>=0 ) { // // Relative accuracy desired // sminoa = fabs(d(1)); if( sminoa!=0 ) { mu = sminoa; for(i = 2; i <= n; i++) { mu = fabs(d(i))*(mu/(mu+fabs(e(i-1)))); sminoa = ap::minreal(sminoa, mu); if( sminoa==0 ) { break; } } } sminoa = sminoa/sqrt(double(n)); thresh = ap::maxreal(tol*sminoa, maxitr*n*n*unfl); } else { // // Absolute accuracy desired // thresh = ap::maxreal(fabs(tol)*smax, maxitr*n*n*unfl); } // // Prepare for main iteration loop for the singular values // (MAXIT is the maximum number of passes through the inner // loop permitted before nonconvergence signalled.) // maxit = maxitr*n*n; iter = 0; oldll = -1; oldm = -1; // // M points to last element of unconverged part of matrix // m = n; // // Begin main iteration loop // while(true) { // // Check for convergence or exceeding iteration count // if( m<=1 ) { break; } if( iter>maxit ) { result = false; return result; } // // Find diagonal block of matrix to work on // if( tol<0&&fabs(d(m))<=thresh ) { d(m) = 0; } smax = fabs(d(m)); smin = smax; matrixsplitflag = false; for(lll = 1; lll <= m-1; lll++) { ll = m-lll; abss = fabs(d(ll)); abse = fabs(e(ll)); if( tol<0&&abss<=thresh ) { d(ll) = 0; } if( abse<=thresh ) { matrixsplitflag = true; break; } smin = ap::minreal(smin, abss); smax = ap::maxreal(smax, ap::maxreal(abss, abse)); } if( !matrixsplitflag ) { ll = 0; } else { // // Matrix splits since E(LL) = 0 // e(ll) = 0; if( ll==m-1 ) { // // Convergence of bottom singular value, return to top of loop // m = m-1; continue; } } ll = ll+1; // // E(LL) through E(M-1) are nonzero, E(LL-1) is zero // if( ll==m-1 ) { // // 2 by 2 block, handle separately // svdv2x2(d(m-1), e(m-1), d(m), sigmn, sigmx, sinr, cosr, sinl, cosl); d(m-1) = sigmx; e(m-1) = 0; d(m) = sigmn; // // Compute singular vectors, if desired // if( ncvt>0 ) { mm0 = m+(vstart-1); mm1 = m-1+(vstart-1); ap::vmove(&vttemp(vstart), &vt(mm1, vstart), ap::vlen(vstart,vend), cosr); ap::vadd(&vttemp(vstart), &vt(mm0, vstart), ap::vlen(vstart,vend), sinr); ap::vmul(&vt(mm0, vstart), ap::vlen(vstart,vend), cosr); ap::vsub(&vt(mm0, vstart), &vt(mm1, vstart), ap::vlen(vstart,vend), sinr); ap::vmove(&vt(mm1, vstart), &vttemp(vstart), ap::vlen(vstart,vend)); } if( nru>0 ) { mm0 = m+ustart-1; mm1 = m-1+ustart-1; ap::vmove(utemp.getvector(ustart, uend), u.getcolumn(mm1, ustart, uend), cosl); ap::vadd(utemp.getvector(ustart, uend), u.getcolumn(mm0, ustart, uend), sinl); ap::vmul(u.getcolumn(mm0, ustart, uend), cosl); ap::vsub(u.getcolumn(mm0, ustart, uend), u.getcolumn(mm1, ustart, uend), sinl); ap::vmove(u.getcolumn(mm1, ustart, uend), utemp.getvector(ustart, uend)); } if( ncc>0 ) { mm0 = m+cstart-1; mm1 = m-1+cstart-1; ap::vmove(&ctemp(cstart), &c(mm1, cstart), ap::vlen(cstart,cend), cosl); ap::vadd(&ctemp(cstart), &c(mm0, cstart), ap::vlen(cstart,cend), sinl); ap::vmul(&c(mm0, cstart), ap::vlen(cstart,cend), cosl); ap::vsub(&c(mm0, cstart), &c(mm1, cstart), ap::vlen(cstart,cend), sinl); ap::vmove(&c(mm1, cstart), &ctemp(cstart), ap::vlen(cstart,cend)); } m = m-2; continue; } // // If working on new submatrix, choose shift direction // (from larger end diagonal element towards smaller) // // Previously was // "if (LL>OLDM) or (M<OLDLL) then" // fixed thanks to Michael Rolle < *****@*****.** > // Very strange that LAPACK still contains it. // bchangedir = false; if( idir==1&&fabs(d(ll))<1.0E-3*fabs(d(m)) ) { bchangedir = true; } if( idir==2&&fabs(d(m))<1.0E-3*fabs(d(ll)) ) { bchangedir = true; } if( ll!=oldll||m!=oldm||bchangedir ) { if( fabs(d(ll))>=fabs(d(m)) ) { // // Chase bulge from top (big end) to bottom (small end) // idir = 1; } else { // // Chase bulge from bottom (big end) to top (small end) // idir = 2; } } // // Apply convergence tests // if( idir==1 ) { // // Run convergence test in forward direction // First apply standard test to bottom of matrix // if( (fabs(e(m-1))<=fabs(tol)*fabs(d(m)))||(tol<0&&fabs(e(m-1))<=thresh) ) { e(m-1) = 0; continue; } if( tol>=0 ) { // // If relative accuracy desired, // apply convergence criterion forward // mu = fabs(d(ll)); sminl = mu; iterflag = false; for(lll = ll; lll <= m-1; lll++) { if( fabs(e(lll))<=tol*mu ) { e(lll) = 0; iterflag = true; break; } mu = fabs(d(lll+1))*(mu/(mu+fabs(e(lll)))); sminl = ap::minreal(sminl, mu); } if( iterflag ) { continue; } } } else { // // Run convergence test in backward direction // First apply standard test to top of matrix // if( (fabs(e(ll))<=fabs(tol)*fabs(d(ll)))||(tol<0&&fabs(e(ll))<=thresh) ) { e(ll) = 0; continue; } if( tol>=0 ) { // // If relative accuracy desired, // apply convergence criterion backward // mu = fabs(d(m)); sminl = mu; iterflag = false; for(lll = m-1; lll >= ll; lll--) { if( fabs(e(lll))<=tol*mu ) { e(lll) = 0; iterflag = true; break; } mu = fabs(d(lll))*(mu/(mu+fabs(e(lll)))); sminl = ap::minreal(sminl, mu); } if( iterflag ) { continue; } } } oldll = ll; oldm = m; // // Compute shift. First, test if shifting would ruin relative // accuracy, and if so set the shift to zero. // if( tol>=0&&n*tol*(sminl/smax)<=ap::maxreal(eps, 0.01*tol) ) { // // Use a zero shift to avoid loss of relative accuracy // shift = 0; } else { // // Compute the shift from 2-by-2 block at end of matrix // if( idir==1 ) { sll = fabs(d(ll)); svd2x2(d(m-1), e(m-1), d(m), shift, r); } else { sll = fabs(d(m)); svd2x2(d(ll), e(ll), d(ll+1), shift, r); } // // Test if shift negligible, and if so set to zero // if( sll>0 ) { if( ap::sqr(shift/sll)<eps ) { shift = 0; } } } // // Increment iteration count // iter = iter+m-ll; // // If SHIFT = 0, do simplified QR iteration // if( shift==0 ) { if( idir==1 ) { // // Chase bulge from top to bottom // Save cosines and sines for later singular vector updates // cs = 1; oldcs = 1; for(i = ll; i <= m-1; i++) { generaterotation(d(i)*cs, e(i), cs, sn, r); if( i>ll ) { e(i-1) = oldsn*r; } generaterotation(oldcs*r, d(i+1)*sn, oldcs, oldsn, tmp); d(i) = tmp; work0(i-ll+1) = cs; work1(i-ll+1) = sn; work2(i-ll+1) = oldcs; work3(i-ll+1) = oldsn; } h = d(m)*cs; d(m) = h*oldcs; e(m-1) = h*oldsn; // // Update singular vectors // if( ncvt>0 ) { applyrotationsfromtheleft(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work0, work1, vt, vttemp); } if( nru>0 ) { applyrotationsfromtheright(fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work2, work3, u, utemp); } if( ncc>0 ) { applyrotationsfromtheleft(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work2, work3, c, ctemp); } // // Test convergence // if( fabs(e(m-1))<=thresh ) { e(m-1) = 0; } } else { // // Chase bulge from bottom to top // Save cosines and sines for later singular vector updates // cs = 1; oldcs = 1; for(i = m; i >= ll+1; i--) { generaterotation(d(i)*cs, e(i-1), cs, sn, r); if( i<m ) { e(i) = oldsn*r; } generaterotation(oldcs*r, d(i-1)*sn, oldcs, oldsn, tmp); d(i) = tmp; work0(i-ll) = cs; work1(i-ll) = -sn; work2(i-ll) = oldcs; work3(i-ll) = -oldsn; } h = d(ll)*cs; d(ll) = h*oldcs; e(ll) = h*oldsn; // // Update singular vectors // if( ncvt>0 ) { applyrotationsfromtheleft(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work2, work3, vt, vttemp); } if( nru>0 ) { applyrotationsfromtheright(!fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work0, work1, u, utemp); } if( ncc>0 ) { applyrotationsfromtheleft(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work0, work1, c, ctemp); } // // Test convergence // if( fabs(e(ll))<=thresh ) { e(ll) = 0; } } } else { // // Use nonzero shift // if( idir==1 ) { // // Chase bulge from top to bottom // Save cosines and sines for later singular vector updates // f = (fabs(d(ll))-shift)*(extsignbdsqr(double(1), d(ll))+shift/d(ll)); g = e(ll); for(i = ll; i <= m-1; i++) { generaterotation(f, g, cosr, sinr, r); if( i>ll ) { e(i-1) = r; } f = cosr*d(i)+sinr*e(i); e(i) = cosr*e(i)-sinr*d(i); g = sinr*d(i+1); d(i+1) = cosr*d(i+1); generaterotation(f, g, cosl, sinl, r); d(i) = r; f = cosl*e(i)+sinl*d(i+1); d(i+1) = cosl*d(i+1)-sinl*e(i); if( i<m-1 ) { g = sinl*e(i+1); e(i+1) = cosl*e(i+1); } work0(i-ll+1) = cosr; work1(i-ll+1) = sinr; work2(i-ll+1) = cosl; work3(i-ll+1) = sinl; } e(m-1) = f; // // Update singular vectors // if( ncvt>0 ) { applyrotationsfromtheleft(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work0, work1, vt, vttemp); } if( nru>0 ) { applyrotationsfromtheright(fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work2, work3, u, utemp); } if( ncc>0 ) { applyrotationsfromtheleft(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work2, work3, c, ctemp); } // // Test convergence // if( fabs(e(m-1))<=thresh ) { e(m-1) = 0; } } else { // // Chase bulge from bottom to top // Save cosines and sines for later singular vector updates // f = (fabs(d(m))-shift)*(extsignbdsqr(double(1), d(m))+shift/d(m)); g = e(m-1); for(i = m; i >= ll+1; i--) { generaterotation(f, g, cosr, sinr, r); if( i<m ) { e(i) = r; } f = cosr*d(i)+sinr*e(i-1); e(i-1) = cosr*e(i-1)-sinr*d(i); g = sinr*d(i-1); d(i-1) = cosr*d(i-1); generaterotation(f, g, cosl, sinl, r); d(i) = r; f = cosl*e(i-1)+sinl*d(i-1); d(i-1) = cosl*d(i-1)-sinl*e(i-1); if( i>ll+1 ) { g = sinl*e(i-2); e(i-2) = cosl*e(i-2); } work0(i-ll) = cosr; work1(i-ll) = -sinr; work2(i-ll) = cosl; work3(i-ll) = -sinl; } e(ll) = f; // // Test convergence // if( fabs(e(ll))<=thresh ) { e(ll) = 0; } // // Update singular vectors if desired // if( ncvt>0 ) { applyrotationsfromtheleft(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work2, work3, vt, vttemp); } if( nru>0 ) { applyrotationsfromtheright(!fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work0, work1, u, utemp); } if( ncc>0 ) { applyrotationsfromtheleft(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work0, work1, c, ctemp); } } } // // QR iteration finished, go back and check convergence // continue; } // // All singular values converged, so make them positive // for(i = 1; i <= n; i++) { if( d(i)<0 ) { d(i) = -d(i); // // Change sign of singular vectors, if desired // if( ncvt>0 ) { ap::vmul(&vt(i+vstart-1, vstart), ap::vlen(vstart,vend), -1); } } } // // Sort the singular values into decreasing order (insertion sort on // singular values, but only one transposition per singular vector) // for(i = 1; i <= n-1; i++) { // // Scan for smallest D(I) // isub = 1; smin = d(1); for(j = 2; j <= n+1-i; j++) { if( d(j)<=smin ) { isub = j; smin = d(j); } } if( isub!=n+1-i ) { // // Swap singular values and vectors // d(isub) = d(n+1-i); d(n+1-i) = smin; if( ncvt>0 ) { j = n+1-i; ap::vmove(&vttemp(vstart), &vt(isub+vstart-1, vstart), ap::vlen(vstart,vend)); ap::vmove(&vt(isub+vstart-1, vstart), &vt(j+vstart-1, vstart), ap::vlen(vstart,vend)); ap::vmove(&vt(j+vstart-1, vstart), &vttemp(vstart), ap::vlen(vstart,vend)); } if( nru>0 ) { j = n+1-i; ap::vmove(utemp.getvector(ustart, uend), u.getcolumn(isub+ustart-1, ustart, uend)); ap::vmove(u.getcolumn(isub+ustart-1, ustart, uend), u.getcolumn(j+ustart-1, ustart, uend)); ap::vmove(u.getcolumn(j+ustart-1, ustart, uend), utemp.getvector(ustart, uend)); } if( ncc>0 ) { j = n+1-i; ap::vmove(&ctemp(cstart), &c(isub+cstart-1, cstart), ap::vlen(cstart,cend)); ap::vmove(&c(isub+cstart-1, cstart), &c(j+cstart-1, cstart), ap::vlen(cstart,cend)); ap::vmove(&c(j+cstart-1, cstart), &ctemp(cstart), ap::vlen(cstart,cend)); } } } return result; }
void internalldltrcond(const ap::real_2d_array& l, const ap::integer_1d_array& pivots, int n, bool isupper, bool isnormprovided, double anorm, double& rcond) { int i; int kase; int k; int km1; int km2; int kp1; int kp2; double ainvnm; ap::real_1d_array work0; ap::real_1d_array work1; ap::real_1d_array work2; ap::integer_1d_array iwork; double v; ap::ap_error::make_assertion(n>=0, ""); // // Check that the diagonal matrix D is nonsingular. // rcond = 0; if( isupper ) { for(i = n; i >= 1; i--) { if( pivots(i)>0&&ap::fp_eq(l(i,i),0) ) { return; } } } else { for(i = 1; i <= n; i++) { if( pivots(i)>0&&ap::fp_eq(l(i,i),0) ) { return; } } } // // Estimate the norm of A. // if( !isnormprovided ) { kase = 0; anorm = 0; while(true) { iterativeestimate1norm(n, work1, work0, iwork, anorm, kase); if( kase==0 ) { break; } if( isupper ) { // // Multiply by U' // k = n; while(k>=1) { if( pivots(k)>0 ) { // // P(k) // v = work0(k); work0(k) = work0(pivots(k)); work0(pivots(k)) = v; // // U(k) // km1 = k-1; v = ap::vdotproduct(&work0(1), 1, &l(1, k), l.getstride(), ap::vlen(1,km1)); work0(k) = work0(k)+v; // // Next k // k = k-1; } else { // // P(k) // v = work0(k-1); work0(k-1) = work0(-pivots(k-1)); work0(-pivots(k-1)) = v; // // U(k) // km1 = k-1; km2 = k-2; v = ap::vdotproduct(&work0(1), 1, &l(1, km1), l.getstride(), ap::vlen(1,km2)); work0(km1) = work0(km1)+v; v = ap::vdotproduct(&work0(1), 1, &l(1, k), l.getstride(), ap::vlen(1,km2)); work0(k) = work0(k)+v; // // Next k // k = k-2; } } // // Multiply by D // k = n; while(k>=1) { if( pivots(k)>0 ) { work0(k) = work0(k)*l(k,k); k = k-1; } else { v = work0(k-1); work0(k-1) = l(k-1,k-1)*work0(k-1)+l(k-1,k)*work0(k); work0(k) = l(k-1,k)*v+l(k,k)*work0(k); k = k-2; } } // // Multiply by U // k = 1; while(k<=n) { if( pivots(k)>0 ) { // // U(k) // km1 = k-1; v = work0(k); ap::vadd(&work0(1), 1, &l(1, k), l.getstride(), ap::vlen(1,km1), v); // // P(k) // v = work0(k); work0(k) = work0(pivots(k)); work0(pivots(k)) = v; // // Next k // k = k+1; } else { // // U(k) // km1 = k-1; kp1 = k+1; v = work0(k); ap::vadd(&work0(1), 1, &l(1, k), l.getstride(), ap::vlen(1,km1), v); v = work0(kp1); ap::vadd(&work0(1), 1, &l(1, kp1), l.getstride(), ap::vlen(1,km1), v); // // P(k) // v = work0(k); work0(k) = work0(-pivots(k)); work0(-pivots(k)) = v; // // Next k // k = k+2; } } } else { // // Multiply by L' // k = 1; while(k<=n) { if( pivots(k)>0 ) { // // P(k) // v = work0(k); work0(k) = work0(pivots(k)); work0(pivots(k)) = v; // // L(k) // kp1 = k+1; v = ap::vdotproduct(&work0(kp1), 1, &l(kp1, k), l.getstride(), ap::vlen(kp1,n)); work0(k) = work0(k)+v; // // Next k // k = k+1; } else { // // P(k) // v = work0(k+1); work0(k+1) = work0(-pivots(k+1)); work0(-pivots(k+1)) = v; // // L(k) // kp1 = k+1; kp2 = k+2; v = ap::vdotproduct(&work0(kp2), 1, &l(kp2, k), l.getstride(), ap::vlen(kp2,n)); work0(k) = work0(k)+v; v = ap::vdotproduct(&work0(kp2), 1, &l(kp2, kp1), l.getstride(), ap::vlen(kp2,n)); work0(kp1) = work0(kp1)+v; // // Next k // k = k+2; } } // // Multiply by D // k = n; while(k>=1) { if( pivots(k)>0 ) { work0(k) = work0(k)*l(k,k); k = k-1; } else { v = work0(k-1); work0(k-1) = l(k-1,k-1)*work0(k-1)+l(k,k-1)*work0(k); work0(k) = l(k,k-1)*v+l(k,k)*work0(k); k = k-2; } } // // Multiply by L // k = n; while(k>=1) { if( pivots(k)>0 ) { // // L(k) // kp1 = k+1; v = work0(k); ap::vadd(&work0(kp1), 1, &l(kp1, k), l.getstride(), ap::vlen(kp1,n), v); // // P(k) // v = work0(k); work0(k) = work0(pivots(k)); work0(pivots(k)) = v; // // Next k // k = k-1; } else { // // L(k) // kp1 = k+1; km1 = k-1; v = work0(k); ap::vadd(&work0(kp1), 1, &l(kp1, k), l.getstride(), ap::vlen(kp1,n), v); v = work0(km1); ap::vadd(&work0(kp1), 1, &l(kp1, km1), l.getstride(), ap::vlen(kp1,n), v); // // P(k) // v = work0(k); work0(k) = work0(-pivots(k)); work0(-pivots(k)) = v; // // Next k // k = k-2; } } } } } // // Quick return if possible // rcond = 0; if( n==0 ) { rcond = 1; return; } if( ap::fp_eq(anorm,0) ) { return; } // // Estimate the 1-norm of inv(A). // kase = 0; while(true) { iterativeestimate1norm(n, work1, work0, iwork, ainvnm, kase); if( kase==0 ) { break; } solvesystemldlt(l, pivots, work0, n, isupper, work2); ap::vmove(&work0(1), 1, &work2(1), 1, ap::vlen(1,n)); } // // Compute the estimate of the reciprocal condition number. // if( ap::fp_neq(ainvnm,0) ) { v = 1/ainvnm; rcond = v/anorm; } }
/** Perform curve fitting via Levenberg-Marquardt method with optional bounds. * \param fxnIn Function to fit. * \param Xvals_ target X values (ordinates, M) * \param Yvals_ target Y values (coordinates, M) * \param ParamVec input parameters(N); at finish contains best esimate of fit parameters * \param boundsIn true if parameter has bounds (N) * \param lboundIn contain lower bounds for parameter (N) * \param uboundIn contain upper bounds for parameter (N) * \param weightsIn contain weights for Y values (M) * \param tolerance Fit tolerance * \param maxIterations Number of iterations to try. */ int CurveFit::LevenbergMarquardt(FitFunctionType fxnIn, Darray const& Xvals_, Darray const& Yvals_, Darray& ParamVec, std::vector<bool> const& boundsIn, Darray const& lboundIn, Darray const& uboundIn, Darray const& weightsIn, double tolerance, int maxIterations) { int info = 0; hasBounds_ = boundsIn; Ubound_ = uboundIn; Lbound_ = lboundIn; Weights_ = weightsIn; if (ParametersHaveProblems(Xvals_, Yvals_, ParamVec)) { DBGPRINT("Error: %s\n", errorMessage_); return info; } // Set initial parameters finalY_ = Yvals_; Params_= ParamVec; // Internal parameter vector fParms_ = ParamVec; // Parameters for function evaluation Pvec_to_Params( ParamVec ); fxn_ = fxnIn; m_ = Xvals_.size(); // Number of values (rows) n_ = Params_.size(); // Number of parameters (cols) Darray residual_( m_ ); // Contain Y vals at current Params minus original Y // Jacobian matrix/R: m rows by n cols, transposed. jacobian_.assign( m_ * n_, 0.0 ); // For holding diagonal elements for scaling // TODO: Rename Darray diag( n_, 0.0 ); // Workspace arrays Darray work1( n_, 0.0 ); Darray work2( n_, 0.0 ); Darray newResidual( m_, 0.0 ); // delta specifies an upper bound on the euclidean norm of d*x double delta = 0.0; // factor is positive input variable used in determining the initial step // bound. This bound is set to the product of factor and the euclidean norm // of diag*x if nonzero, or else to factor itself. In most cases factor should // lie in the interval (.1,100.). 100. is a generally recommended value. // TODO: Make input parameter const double factor = 100.0; // gtol is a nonnegative input variable. Termination occurs when the cosine // of the angle between residual and any column of the Jacobian is at most // gtol in absolute value. Therefore, gtol measures the orthogonality // desired between the function vector and the columns of the Jacobian. // TODO: Make input parameter const double gtol = 0.0; // Smallest possible magnitude // TODO: Make Constant? const double dwarf = DBL_MIN; // Levenberg-Marquard parameter double LM_par = 0.0; // xtol is a nonnegative input variable. Termination occurs when the // relative error between two consecutive iterates is at most xtol. // Therefore, xtol measures the relative error desired in the // approximate solution. // TODO: Make input paramter double xtol = 0.0; // maxfev is the maxiumum number of allowed function evaluations. // NOTE: Included here for complete compatibility with eariler code, // though may not be strictly necessary. dsize maxfev = (n_ + 1) * (dsize)maxIterations; // Evaluate initial function, obtain residual EvaluateFxn( Xvals_, Yvals_, Params_, residual_ ); // Calculate norm of the residual double rnorm = VecNorm( residual_ ); DBGPRINT("Rnorm= %g\n", rnorm); dsize nfev = 1; // Will be set to calls to fxn_. 1 already. // MAIN LOOP int currentIt = 0; while ( currentIt < maxIterations ) { DBGPRINT("DEBUG: ----- Iteration %i ------------------------------\n", currentIt+1); // Calculate the Jacobian using the forward-difference approximation. CalcJacobian_ForwardDiff( Xvals_, Yvals_, Params_, residual_, newResidual ); PrintMatrix( "Jacobian", n_, m_, jacobian_ ); nfev += n_; // ------------------------------------------- // | BEGIN QRFAC // ------------------------------------------- // Perform QR factorization of Jacobian via Householder transformations // with column pivoting: // J * P = Q * R // where J is the Jacobian, P is the permutation matrix, Q is an orthogonal // matrix, and R is an upper trapezoidal matrix with diagonal elements of // nonincreasing magnitude. The Householder transformation for column k, // k = 1,2,...,min(m,n), is of the form: // I = (1/u[k])*u*ut // where u has zeros in the first k-1 positions. Adapted from routine qrfac_ // in Grace 5.1.22 lmdif.c, which in turn is derived from an earlier linpack // subroutine. DBGPRINT("\nQRFAC ITERATION %i\n", currentIt+1); // Rdiag will hold the diagonal elements of R. Darray Rdiag(n_, 0.0); // Jpvt defines the permutation matrix p such that a*p = q*r. Column j of p // is column Jpvt[j] of the identity matrix. Iarray Jpvt(n_, 0.0); // JcolNorm Will hold the norms of the columns of input Jacobian. Darray JcolNorm_(n_, 0.0); // Compute the initial column norms and initialize arrays. for (dsize in = 0; in != n_; in++) { JcolNorm_[in] = VecNorm( jacobian_.begin() + (in * m_), m_ ); Rdiag[in] = JcolNorm_[in]; work1[in] = Rdiag[in]; Jpvt[in] = in; DBGPRINT("%lu: Rdiag= %12.6g wa= %12.6g ipvt= %i\n", in+1, Rdiag[in], work1[in], Jpvt[in]+1); } // Reduce Jacobian to R with Householder transformations. dsize min_m_n = std::min( m_, n_ ); for (dsize in = 0; in != min_m_n; in++) { // Bring the column of largest norm into the pivot position. dsize kmax = in; for (dsize k = in; k != n_; k++) { DBGPRINT("\tRdiag[%lu]= %g Rdiag[%lu]= %g\n", k+1, Rdiag[k], kmax+1, Rdiag[kmax]); if (Rdiag[k] > Rdiag[kmax]) kmax = k; } DBGPRINT("Elt= %lu Kmax= %lu\n", in+1, kmax+1); if (kmax != in) { for (dsize i = 0; i != m_; i++) { double temp = jacobian_[i + in * m_]; jacobian_[i + in * m_] = jacobian_[i + kmax * m_]; jacobian_[i + kmax * m_] = temp; DBGPRINT("DBG: Swap jac[%lu,%lu] with jac[%lu,%lu]\n", i+1, in+1, i+1, kmax+1); } Rdiag[kmax] = Rdiag[in]; work1[kmax] = work1[in]; dsize k = Jpvt[in]; Jpvt[in] = Jpvt[kmax]; Jpvt[kmax] = k; } // Compute the Householder transformation to reduce the j-th column // of Jacobian to a multiple of the j-th unit vector. double ajnorm = VecNorm( jacobian_.begin() + (in + in * m_), m_ - in ); DBGPRINT("#Elt= %lu mat[%lu]= %g ajnorm= %g\n", m_ - in, in + in * m_, jacobian_[in + in * m_], ajnorm); if (ajnorm != 0.0) { if (jacobian_[in + in * m_] < 0.0) ajnorm = -ajnorm; for (dsize im = in; im < m_; im++) jacobian_[im + in * m_] /= ajnorm; jacobian_[in + in * m_] += 1.0; // Apply the transfomation to the remaining columns and update the norms dsize in1 = in + 1; if (in1 < n_) { for (dsize k = in1; k < n_; k++) { double sum = 0.0; for (dsize i = in; i < m_; i++) sum += jacobian_[i + in * m_] * jacobian_[i + k * m_]; double temp = sum / jacobian_[in + in * m_]; for (dsize i = in; i < m_; i++) jacobian_[i + k * m_] -= temp * jacobian_[i + in * m_]; if (Rdiag[k] != 0.0) { temp = jacobian_[in + k * m_] / Rdiag[k]; Rdiag[k] *= sqrt( std::max( 0.0, 1.0 - temp * temp ) ); temp = Rdiag[k] / work1[k]; DBGPRINT("\t\tQRFAC TEST: 0.5 * %g^2 <= %g\n", temp, machine_epsilon); if (0.05 * (temp * temp) <= machine_epsilon) { DBGPRINT("\t\tTEST PASSED\n"); Rdiag[k] = VecNorm( jacobian_.begin() + (in1 + k * m_), m_ - in - 1 ); work1[k] = Rdiag[k]; } } DBGPRINT("QRFAC Rdiag[%lu]= %g\n", k+1, Rdiag[k]); } } } Rdiag[in] = -ajnorm; } // ------------------------------------------- // | END QRFAC // ------------------------------------------- PrintMatrix("QR", n_, m_, jacobian_); for (dsize in = 0; in != n_; in++) DBGPRINT("\tRdiag[%lu]= %12.6g acnorm[%lu]= %12.6g ipvt[%lu]= %i\n", in+1, Rdiag[in], in+1, JcolNorm_[in], in+1, Jpvt[in]+1); double xnorm = 0.0; if ( currentIt == 0 ) { // First iteration. Scale according to the norms of the columns of // the initial Jacobian. for (dsize in = 0; in != n_; in++) { if ( JcolNorm_[in] == 0.0 ) diag[in] = 1.0; else diag[in] = JcolNorm_[in]; } PrintVector("diag", diag); // Calculate norm of scaled params and init step bound delta for (dsize in = 0; in != n_; in++) work1[in] = diag[in] * Params_[in]; xnorm = VecNorm( work1 ); delta = factor * xnorm; if (delta == 0.0) delta = factor; DBGPRINT("Delta= %g\n", delta); } // Form Qt * residual and store in Qt_r. Only first n components of // Qt*r are needed. Darray Qt_r( n_, 0.0 ); newResidual = residual_; for (dsize in = 0; in != n_; in++) { double matElt = jacobian_[in + in * m_]; DBGPRINT("DEBUG: Element %lu = %g\n", in+1, matElt); if (matElt != 0.0) { double sum = 0.0; for (dsize i = in; i < m_; i++) sum += jacobian_[i + in * m_] * newResidual[i]; double temp = -sum / matElt; for (dsize i = in; i < m_; i++) newResidual[i] += jacobian_[i + in * m_] * temp; } jacobian_[in + in * m_] = Rdiag[in]; DBGPRINT("\tRdiag[%lu]= %g\n", in+1, jacobian_[in + in * m_]); Qt_r[in] = newResidual[in]; DBGPRINT("DEBUG: qtf[%lu]= %g\n", in+1, Qt_r[in]); } // Compute the norm of the scaled gradient. double gnorm = 0.0; if ( rnorm > 0.0 ) { for (dsize in = 0; in != n_; in++) { int l = Jpvt[ in ]; if (JcolNorm_[in] != 0.0) { double sum = 0.0; for (dsize i2 = 0; i2 <= in; i2++) { sum += jacobian_[i2 + in * m_] * (Qt_r[i2] / rnorm); DBGPRINT("DEBUG: jacobian[%lu, %lu]= %g\n", i2+1, in+1, jacobian_[i2 + in * m_]); } // Determine max double d1 = fabs( sum / JcolNorm_[l] ); gnorm = std::max( gnorm, d1 ); } } } DBGPRINT("gnorm= %g\n", gnorm); // Test for convergence of gradient norm. if (gnorm <= gtol) { DBGPRINT("Gradient norm %g is less than gtol %g, iteration %i\n", gnorm, gtol, currentIt+1); info = 4; break; } // Rescale if necessary for (dsize in = 0; in != n_; in++) diag[in] = std::max( diag[in], JcolNorm_[in] ); PrintVector("RescaleDiag", diag); double Ratio = 0.0; while (Ratio < 0.0001) { // ----------------------------------------- // | LMPAR BEGIN // ----------------------------------------- // NOTE: Adapted from lmpar_ in lmdif.c from Grace 5.1.22 DBGPRINT("\nLMPAR ITERATION %i\n", currentIt+1); // Determine the Levenberg-Marquardt parameter. Darray Xvec(n_, 0.0); // Will contain solution to A*x=b, sqrt(par)*D*x=0 // Compute and store in Xvec the Gauss-Newton direction. // If the Jacobian is rank-deficient, obtain a least-squares solution. dsize rank = n_; for (dsize in = 0; in != n_; in++) { work1[in] = Qt_r[in]; DBGPRINT("jac[%lu,%lu]= %12.6g rank= %4lu", in+1, in+1, jacobian_[in + in * m_], rank); if (jacobian_[in + in * m_] == 0.0 && rank == n_) rank = in; if (rank < n_) work1[in] = 0.0; DBGPRINT(" wa1= %12.6g\n", work1[in]); } DBGPRINT("Final rank= %lu\n", rank); // Subtract 1 from rank to use as an index. long int rm1 = rank - 1; if (rm1 >= 0) { for (long int k = 0; k <= rm1; k++) { long int in = rm1 - k; work1[in] /= jacobian_[in + in * (long int)m_]; DBGPRINT("wa1[%li] /= %g\n", in+1, jacobian_[in + in * (long int)m_]); long int in1 = in - 1; if (in1 > -1) { double temp = work1[in]; for (long int i = 0; i <= in1; i++) { work1[i] -= jacobian_[i + in * (long int)m_] * temp; DBGPRINT(" wa1[%li] -= %g\n", i+1, jacobian_[i + in * (long int)m_]); } } } } PrintVector("work1", work1); for (dsize in = 0; in != n_; in++) Xvec[ Jpvt[in] ] = work1[in]; // Evaluate the function at the origin, and test for acceptance of // the Gauss-Newton direction. for (dsize in = 0; in != n_; in++) { DBGPRINT("work2[%lu] = %g * %g\n", in+1, diag[in], Xvec[in]); work2[in] = diag[in] * Xvec[in]; } double dxnorm = VecNorm( work2 ); DBGPRINT("dxnorm= %g\n", dxnorm); double fp = dxnorm - delta; // Initialize counter for searching for LM parameter int lmIterations = 0; if (fp > 0.1 * delta) { // If the Jacobian is not rank deficient, the Newton step provdes a lower // bound, parl, for the zero of the function. Otherwise set this bound to // zero double parl = 0.0; if ( rank >= n_ ) { for (dsize in = 0; in != n_; in++) { int idx = Jpvt[in]; work1[in] = diag[idx] * (work2[idx] / dxnorm); } for (dsize in = 0; in != n_; in++) { double sum = 0.0; long int in1 = (long int)in - 1; if (in1 > -1) { for (long int i = 0; i <= in1; i++) sum += jacobian_[i + in * m_] * work1[i]; } work1[in] = (work1[in] - sum) / jacobian_[in + in * m_]; } double temp = VecNorm(work1); parl = fp / delta / temp / temp; } DBGPRINT("parl= %g\n",parl); // Calculate an upper bound, paru, for the zero of the function for (dsize in = 0; in != n_; in++) { double sum = 0.0; for (dsize i = 0; i <= in; i++) sum += jacobian_[i + in * m_] * Qt_r[i]; work1[in] = sum / diag[ Jpvt[in] ]; DBGPRINT("paru work1[%lu]= %g\n", in+1, work1[in]); } double w1norm = VecNorm( work1 ); double paru = w1norm / delta; DBGPRINT("paru = %g = %g / %g\n", paru, w1norm, delta); if (paru == 0.0) paru = dwarf / std::min( delta, 0.1 ); DBGPRINT("paru= %g\n", paru); // If the current L-M parameter lies outside of the interval (parl,paru), // set par to the closer endpoint. LM_par = std::max( LM_par, parl ); LM_par = std::min( LM_par, paru ); if (LM_par == 0.0) LM_par = w1norm / dxnorm; // Iteration start. bool lmLoop = true; while (lmLoop) { ++lmIterations; DBGPRINT("\t[ lmLoop %i parl= %g paru= %g LM_par= %g ]\n", lmIterations, parl, paru, LM_par); // Evaluate the function at the current value of LM_par if (LM_par == 0.0) LM_par = std::max( dwarf, 0.001 * paru ); double temp = sqrt( LM_par ); for (dsize in = 0; in != n_; in++) { work1[in] = temp * diag[in]; DBGPRINT("\tLMPAR work1[%lu]= %g = %g * %g\n", in+1, work1[in], temp, diag[in]); } // ----------------------------------------------- // | BEGIN QRSOLV // ----------------------------------------------- // NOTE: Adapted from qrsolv_ in lmdif.c from Grace 5.1.22 DBGPRINT("\nQRSOLV ITERATION %i\n", lmIterations); // This array of length n will hold the diagonal elements of the // upper triangular matrix s. Darray Sdiag(n_, 0.0); // Copy R and Qt_r to preserve input and initialize s. for (dsize in = 0; in != n_; in++) { for (dsize i = in; i != n_; i++) jacobian_[i + in * m_] = jacobian_[in + i * m_]; Xvec[in] = jacobian_[in + in * m_]; work2[in] = Qt_r[in]; } // Eliminate the diagonal matrix D using a Givens rotation for (dsize in = 0; in != n_; in++) { // Prepare the row of D to be eliminated, locating the diagonal // element using p from the QR factorization. double diagL = work1[ Jpvt[in] ]; DBGPRINT("diag[%i] = %g\n", Jpvt[in]+1, diagL); if (diagL != 0.0) { for (dsize k = in; k < n_; k++) Sdiag[k] = 0.0; Sdiag[in] = diagL; // The transformations to eliminate the row of D modify only // a single element of Qt_r beyond the first n, which is // initially zero. double qtbpj = 0.0; for (dsize k = in; k < n_; k++) { // Determine a Givens rotation which eliminates the appropriate // element in the current row of D. if (Sdiag[k] != 0.0) { double d1 = jacobian_[k + k * m_]; double cos, sin; if (fabs(d1) < fabs(Sdiag[k])) { double cotan = jacobian_[k + k * m_] / Sdiag[k]; sin = 0.5 / sqrt(0.25 + 0.25 * (cotan * cotan)); cos = sin * cotan; } else { double tan = Sdiag[k] / jacobian_[k + k *m_]; cos = 0.5 / sqrt(0.25 + 0.25 * (tan * tan)); sin = cos * tan; } DBGPRINT("DBG QRsolv: %lu, %lu: cos= %12.6g sin= %12.6g\n", in+1, k+1, cos, sin); // Compute the modified diagonal element of R and the // modified element of (Qt_r, 0) jacobian_[k + k * m_] = cos * jacobian_[k + k * m_] + sin * Sdiag[k]; double temp = cos * work2[k] + sin * qtbpj; qtbpj = -sin * work2[k] + cos * qtbpj; work2[k] = temp; DBGPRINT("\twork2[%lu]= %g\n", k+1, work2[k]); // Accumulate the transformation in the row of S dsize kp1 = k + 1; if (kp1 <= n_) { for (dsize i = kp1; i < n_; i++) { temp = cos * jacobian_[i + k * m_] + sin * Sdiag[i]; Sdiag[i] = -sin * jacobian_[i + k * m_] + cos * Sdiag[i]; jacobian_[i + k * m_] = temp; } } } } } // Store the diagonal element of s and restore the corresponding // diagonal element of r Sdiag[in] = jacobian_[in + in * m_]; DBGPRINT("QRsolv sdiag[%lu]= %g\n", in+1, Sdiag[in]); jacobian_[in + in * m_] = Xvec[in]; } // Solve the triangular system for z. if the system is singular, // then obtain a least-squares solution. dsize u_nsing = n_; for (dsize in = 0; in != n_; in++) { if (Sdiag[in] == 0.0 && u_nsing == n_) u_nsing = in; if (u_nsing < n_) work2[in] = 0.0; } DBGPRINT("nsing= %lu\n", u_nsing); // Subtract 1 from nsing to use as an index. long int nsing = (long int)u_nsing - 1; if (nsing >= 0) { for (long int k = 0; k <= nsing; k++) { long int in = nsing - k; double sum = 0.0; long int in1 = in + 1; if (in1 <= nsing) { for (long int i = in1; i <= nsing; i++) sum += jacobian_[i + in * (long int)m_] * work2[i]; } work2[in] = (work2[in] - sum) / Sdiag[in]; } } // Permute the components of z back to components of x. for (dsize in = 0; in != n_; in++) Xvec[ Jpvt[in] ] = work2[in]; PrintVector("QRsolv Xvec", Xvec); PrintVector("sdiag", Sdiag); // ----------------------------------------------- // | END QRSOLV // ----------------------------------------------- for (dsize in = 0; in != n_; in++) { work2[in] = diag[in] * Xvec[in]; DBGPRINT("\twork2[%lu]= %g = %g * %g\n", in+1, work2[in], diag[in], Xvec[in]); } dxnorm = VecNorm( work2 ); temp = fp; fp = dxnorm - delta; DBGPRINT("DBG LMPAR: fp = %g = %g - %g\n", fp, dxnorm, delta); // If the function is small enough, accept the current value of LM_par. // Also test for the exceptional cases where parl is zero of the number // of iterations has reached 10. if (fabs(fp) <= 0.1 * delta || (parl == 0.0 && fp <= temp && temp < 0.0) || lmIterations == 10) { lmLoop = false; break; } // Compute the Newton correction. for (dsize in = 0; in != n_; in++) { int idx = Jpvt[ in ]; work1[in] = diag[idx] * (work2[idx] / dxnorm); } for (dsize in = 0; in != n_; in++) { work1[in] /= Sdiag[in]; temp = work1[in]; dsize in1 = in + 1; if (in1 <= n_) { for (dsize i = in1; i < n_; i++) work1[i] -= jacobian_[i + in * m_] * temp; } } temp = VecNorm( work1 ); double parc = fp / delta / temp / temp; DBGPRINT("parc= %g\n", parc); // Depending on the sign of the function, update parl or paru if (fp > 0.0) parl = std::max( parl, LM_par ); if (fp < 0.0) paru = std::min( paru, LM_par ); // Compute an improved estimate for the parameter LM_par = std::max( parl, LM_par + parc ); } } DBGPRINT("END LMPAR iter= %i LM_par= %g\n", lmIterations, LM_par); if (lmIterations == 0) LM_par = 0.0; // ------------------------------------------- // | LMPAR END // ------------------------------------------- DBGPRINT("DEBUG: LM_par is %g\n", LM_par); // Store the direction Xvec and Param + Xvec. Calculate norm of Param. PrintVector("DEBUG: hvec", Xvec); for (dsize in = 0; in != n_; in++) { Xvec[in] = -Xvec[in]; work1[in] = Params_[in] + Xvec[in]; work2[in] = diag[in] * Xvec[in]; } double pnorm = VecNorm( work2 ); DBGPRINT("pnorm= %g\n", pnorm); // On first iteration, adjust initial step bound if (currentIt == 0) delta = std::min( delta, pnorm ); DBGPRINT("Delta is now %g\n", delta); // Evaluate function at Param + Xvec and calculate its norm EvaluateFxn( Xvals_, Yvals_, work1, newResidual ); ++nfev; double rnorm1 = VecNorm( newResidual ); DBGPRINT("rnorm1= %g\n", rnorm1); // Compute the scaled actual reduction double actual_reduction = -1.0; if ( 0.1 * rnorm1 < rnorm) { double d1 = rnorm1 / rnorm; actual_reduction = 1.0 - d1 * d1; } DBGPRINT("actualReduction= %g\n", actual_reduction); // Compute the scaled predicted reduction and the scaled directional // derivative. for (dsize in = 0; in != n_; in++) { work2[in] = 0.0; double temp = Xvec[ Jpvt[in] ]; for (dsize i = 0; i <= in; i++) work2[i] += jacobian_[i + in * m_] * temp; } double temp1 = VecNorm( work2 ) / rnorm; double temp2 = sqrt(LM_par) * pnorm / rnorm; DBGPRINT("temp1= %g temp2= %g\n", temp1, temp2); double predicted_reduction = temp1 * temp1 + temp2 * temp2 / 0.5; double dirder = -(temp1 * temp1 + temp2 * temp2); DBGPRINT("predictedReduction = %12.6g dirder= %12.6g\n", predicted_reduction, dirder); // Compute the ratio of the actial to the predicted reduction. if (predicted_reduction != 0.0) Ratio = actual_reduction / predicted_reduction; DBGPRINT("ratio= %g\n", Ratio); // Update the step bound if (Ratio <= 0.25) { DBGPRINT("Ratio <= 0.25\n"); double temp; if (actual_reduction < 0.0) temp = 0.5 * dirder / (dirder + 0.5 * actual_reduction); else temp = 0.5; if ( 0.1 * rnorm1 >= rnorm || temp < 0.1 ) temp = 0.1; DBGPRINT("delta = %g * min( %g, %g )\n", temp, delta, pnorm / 0.1); delta = temp * std::min( delta, pnorm / 0.1 ); LM_par /= temp; } else { if (LM_par == 0.0 || Ratio >= 0.75) { DBGPRINT("Ratio > 0.25 and (LMpar is zero or Ratio >= 0.75\n"); delta = pnorm / 0.5; LM_par *= 0.5; } } DBGPRINT("LM_par= %g Delta= %g\n", LM_par, delta); if (Ratio >= 0.0001) { // Successful iteration. Update Param, residual, and their norms. for (dsize in = 0; in != n_; in++) { Params_[in] = work1[in]; work1[in] = diag[in] * Params_[in]; } for (dsize im = 0; im < m_; im++) residual_[im] = newResidual[im]; xnorm = VecNorm( work1 ); rnorm = rnorm1; } // Tests for convergence if (fabs(actual_reduction) <= tolerance && predicted_reduction <= tolerance && 0.5 * Ratio <= 1.0) info = 1; if (delta <= xtol * xnorm) info = 2; if (fabs(actual_reduction) <= tolerance && predicted_reduction <= tolerance && 0.5 * Ratio <= 1.0 && info == 2) info = 3; if (info != 0) break; // Tests for stringent tolerance if (nfev >= maxfev) info = 5; if (fabs(actual_reduction) <= machine_epsilon && predicted_reduction <= machine_epsilon && 0.5 * Ratio <= 1.0) info = 6; if (delta <= machine_epsilon * xnorm) info = 7; if (gnorm <= machine_epsilon) info = 8; if (info != 0) break; } // END inner loop if (info != 0) break; currentIt++; } // Final parameters and Y at final parameters Params_to_Pvec(ParamVec, Params_); fxn_(Xvals_, ParamVec, finalY_); # ifdef DBG_CURVEFIT DBGPRINT("%s\n", Message(info)); DBGPRINT("Exiting with info value = %i\n", info); for (dsize in = 0; in != n_; in++) DBGPRINT("\tParams[%lu]= %g\n", in, ParamVec[in]); # endif return info; }