Example #1
0
void work_()
{
    long t;

    asm("nop");
    pmem_in[2] = 'A';
    t = *(long*)0x200000018;
    output_char('t');
    output_q_hex(t);
    output_char(C_n);
    output_char(*(char*)0x0000088000000002);
    output_q_hex(*(char*)0x0000088000000002);
    output_char(C_n);
    asm("nop");
    asm("nop");
    *(pmem_in+2) = 'A';
    output_char('u');
    output_char(*(char*)0x0000088000000002);
    output_q_hex(*(char*)0x0000088000000002);
    output_char(C_n);
    work0();
    work1();
    work2();
    return;
}
void good_nesting2(int n)
{
  int i;
  #pragma omp parallel default(shared)
  {
    #pragma omp for
    for (i=0; i<n; i++)
      work1(i, n);
  }
}
Example #3
0
File: a.40.1.c Project: 0day-ci/gcc
void
a40 (pair * p)
{
#pragma omp parallel sections
  {
#pragma omp section
    incr_pair (p, work1 (), work2 ());
#pragma omp section
    incr_b (p, work3 ());
  }
}
void foo(int q) {
  int p = 2;

  #pragma omp parallel firstprivate(q, p)
  work1(p, q);
// IPCP: call void @work1(i32 2, i32 %{{[._a-zA-Z0-9]*}})

  #pragma omp parallel for firstprivate(p, q)
  for (int i = 0; i < q; i++)
    work2(i, p);
// IPCP: call void @work2(i32 %{{[._a-zA-Z0-9]*}}, i32 2)

  #pragma omp target teams firstprivate(p)
  work12(p, p);
// IPCP: call void @work12(i32 2, i32 2)
}
Example #5
0
int main(int argc, const char * argv[])
{

    //demo1();
    
    //demo2();
    
    //demo3();
    
    work1();
    
    
    
    
    
    return 0;
}
Example #6
0
TEST(fit_ellipse, test1)
{
    int num_points = 7, status = 0;
    double maj_d = 0.0, min_d = 0.0, pa_d = 0.0;
    float maj_f = 0.0, min_f = 0.0, pa_f = 0.0;

    // Test double precision.
    {
        std::vector<double> x(num_points), y(num_points);
        std::vector<double> work1(5 * num_points), work2(5 * num_points);
        x[0] = -0.1686;
        x[1] = -0.0921;
        x[2] =  0.0765;
        x[3] =  0.1686;
        x[4] =  0.0921;
        x[5] = -0.0765;
        x[6] = -0.1686;

        y[0] =  0.7282;
        y[1] =  0.6994;
        y[2] =  0.6675;
        y[3] =  0.6643;
        y[4] =  0.7088;
        y[5] =  0.7407;
        y[6] =  0.7282;

        oskar_fit_ellipse_d(&maj_d, &min_d, &pa_d, num_points, &x[0], &y[0],
                &work1[0], &work2[0], &status);
        EXPECT_EQ(0, status) << oskar_get_error_string(status);
    }

    // Test single precision.
    {
        std::vector<float> x(num_points), y(num_points);
        std::vector<float> work1(5 * num_points), work2(5 * num_points);
        x[0] = -0.1686;
        x[1] = -0.0921;
        x[2] =  0.0765;
        x[3] =  0.1686;
        x[4] =  0.0921;
        x[5] = -0.0765;
        x[6] = -0.1686;

        y[0] =  0.7282;
        y[1] =  0.6994;
        y[2] =  0.6675;
        y[3] =  0.6643;
        y[4] =  0.7088;
        y[5] =  0.7407;
        y[6] =  0.7282;

        oskar_fit_ellipse_f(&maj_f, &min_f, &pa_f, num_points, &x[0], &y[0],
                &work1[0], &work2[0], &status);
        EXPECT_EQ(0, status) << oskar_get_error_string(status);
    }

    // Compare results.
    EXPECT_NEAR(maj_d, 0.3608619735, 1e-9);
    EXPECT_NEAR(min_d, 0.0494223702, 1e-9);
    EXPECT_NEAR(pa_d, -1.3865537748, 1e-9);
    EXPECT_NEAR(maj_d, maj_f, 1e-5);
    EXPECT_NEAR(min_d, min_f, 1e-5);
    EXPECT_NEAR(pa_d, pa_f, 1e-5);
}
Example #7
0
File: bdsvd.cpp Project: 0004c/VTK
/*************************************************************************
Internal working subroutine for bidiagonal decomposition
*************************************************************************/
bool bidiagonalsvddecompositioninternal(ap::real_1d_array& d,
     ap::real_1d_array e,
     int n,
     bool isupper,
     bool isfractionalaccuracyrequired,
     ap::real_2d_array& u,
     int ustart,
     int nru,
     ap::real_2d_array& c,
     int cstart,
     int ncc,
     ap::real_2d_array& vt,
     int vstart,
     int ncvt)
{
    bool result;
    int i;
    int idir;
    int isub;
    int iter;
    int j;
    int ll = 0; // Eliminate compiler warning.
    int lll;
    int m;
    int maxit;
    int oldll;
    int oldm;
    double abse;
    double abss;
    double cosl;
    double cosr;
    double cs;
    double eps;
    double f;
    double g;
    double h;
    double mu;
    double oldcs;
    double oldsn = 0.; // Eliminate compiler warning.
    double r;
    double shift;
    double sigmn;
    double sigmx;
    double sinl;
    double sinr;
    double sll;
    double smax;
    double smin;
    double sminl;
    double sminoa;
    double sn;
    double thresh;
    double tol;
    double tolmul;
    double unfl;
    ap::real_1d_array work0;
    ap::real_1d_array work1;
    ap::real_1d_array work2;
    ap::real_1d_array work3;
    int maxitr;
    bool matrixsplitflag;
    bool iterflag;
    ap::real_1d_array utemp;
    ap::real_1d_array vttemp;
    ap::real_1d_array ctemp;
    ap::real_1d_array etemp;
    bool fwddir;
    double tmp;
    int mm1;
    int mm0;
    bool bchangedir;
    int uend;
    int cend;
    int vend;

    result = true;
    if( n==0 )
    {
        return result;
    }
    if( n==1 )
    {
        if( d(1)<0 )
        {
            d(1) = -d(1);
            if( ncvt>0 )
            {
                ap::vmul(&vt(vstart, vstart), ap::vlen(vstart,vstart+ncvt-1), -1);
            }
        }
        return result;
    }
    
    //
    // init
    //
    work0.setbounds(1, n-1);
    work1.setbounds(1, n-1);
    work2.setbounds(1, n-1);
    work3.setbounds(1, n-1);
    uend = ustart+ap::maxint(nru-1, 0);
    vend = vstart+ap::maxint(ncvt-1, 0);
    cend = cstart+ap::maxint(ncc-1, 0);
    utemp.setbounds(ustart, uend);
    vttemp.setbounds(vstart, vend);
    ctemp.setbounds(cstart, cend);
    maxitr = 12;
    fwddir = true;
    
    //
    // resize E from N-1 to N
    //
    etemp.setbounds(1, n);
    for(i = 1; i <= n-1; i++)
    {
        etemp(i) = e(i);
    }
    e.setbounds(1, n);
    for(i = 1; i <= n-1; i++)
    {
        e(i) = etemp(i);
    }
    e(n) = 0;
    idir = 0;
    
    //
    // Get machine constants
    //
    eps = ap::machineepsilon;
    unfl = ap::minrealnumber;
    
    //
    // If matrix lower bidiagonal, rotate to be upper bidiagonal
    // by applying Givens rotations on the left
    //
    if( !isupper )
    {
        for(i = 1; i <= n-1; i++)
        {
            generaterotation(d(i), e(i), cs, sn, r);
            d(i) = r;
            e(i) = sn*d(i+1);
            d(i+1) = cs*d(i+1);
            work0(i) = cs;
            work1(i) = sn;
        }
        
        //
        // Update singular vectors if desired
        //
        if( nru>0 )
        {
            applyrotationsfromtheright(fwddir, ustart, uend, 1+ustart-1, n+ustart-1, work0, work1, u, utemp);
        }
        if( ncc>0 )
        {
            applyrotationsfromtheleft(fwddir, 1+cstart-1, n+cstart-1, cstart, cend, work0, work1, c, ctemp);
        }
    }
    
    //
    // Compute singular values to relative accuracy TOL
    // (By setting TOL to be negative, algorithm will compute
    // singular values to absolute accuracy ABS(TOL)*norm(input matrix))
    //
    tolmul = ap::maxreal(double(10), ap::minreal(double(100), pow(eps, -0.125)));
    tol = tolmul*eps;
    if( !isfractionalaccuracyrequired )
    {
        tol = -tol;
    }
    
    //
    // Compute approximate maximum, minimum singular values
    //
    smax = 0;
    for(i = 1; i <= n; i++)
    {
        smax = ap::maxreal(smax, fabs(d(i)));
    }
    for(i = 1; i <= n-1; i++)
    {
        smax = ap::maxreal(smax, fabs(e(i)));
    }
    sminl = 0;
    if( tol>=0 )
    {
        
        //
        // Relative accuracy desired
        //
        sminoa = fabs(d(1));
        if( sminoa!=0 )
        {
            mu = sminoa;
            for(i = 2; i <= n; i++)
            {
                mu = fabs(d(i))*(mu/(mu+fabs(e(i-1))));
                sminoa = ap::minreal(sminoa, mu);
                if( sminoa==0 )
                {
                    break;
                }
            }
        }
        sminoa = sminoa/sqrt(double(n));
        thresh = ap::maxreal(tol*sminoa, maxitr*n*n*unfl);
    }
    else
    {
        
        //
        // Absolute accuracy desired
        //
        thresh = ap::maxreal(fabs(tol)*smax, maxitr*n*n*unfl);
    }
    
    //
    // Prepare for main iteration loop for the singular values
    // (MAXIT is the maximum number of passes through the inner
    // loop permitted before nonconvergence signalled.)
    //
    maxit = maxitr*n*n;
    iter = 0;
    oldll = -1;
    oldm = -1;
    
    //
    // M points to last element of unconverged part of matrix
    //
    m = n;
    
    //
    // Begin main iteration loop
    //
    while(true)
    {
        
        //
        // Check for convergence or exceeding iteration count
        //
        if( m<=1 )
        {
            break;
        }
        if( iter>maxit )
        {
            result = false;
            return result;
        }
        
        //
        // Find diagonal block of matrix to work on
        //
        if( tol<0&&fabs(d(m))<=thresh )
        {
            d(m) = 0;
        }
        smax = fabs(d(m));
        smin = smax;
        matrixsplitflag = false;
        for(lll = 1; lll <= m-1; lll++)
        {
            ll = m-lll;
            abss = fabs(d(ll));
            abse = fabs(e(ll));
            if( tol<0&&abss<=thresh )
            {
                d(ll) = 0;
            }
            if( abse<=thresh )
            {
                matrixsplitflag = true;
                break;
            }
            smin = ap::minreal(smin, abss);
            smax = ap::maxreal(smax, ap::maxreal(abss, abse));
        }
        if( !matrixsplitflag )
        {
            ll = 0;
        }
        else
        {
            
            //
            // Matrix splits since E(LL) = 0
            //
            e(ll) = 0;
            if( ll==m-1 )
            {
                
                //
                // Convergence of bottom singular value, return to top of loop
                //
                m = m-1;
                continue;
            }
        }
        ll = ll+1;
        
        //
        // E(LL) through E(M-1) are nonzero, E(LL-1) is zero
        //
        if( ll==m-1 )
        {
            
            //
            // 2 by 2 block, handle separately
            //
            svdv2x2(d(m-1), e(m-1), d(m), sigmn, sigmx, sinr, cosr, sinl, cosl);
            d(m-1) = sigmx;
            e(m-1) = 0;
            d(m) = sigmn;
            
            //
            // Compute singular vectors, if desired
            //
            if( ncvt>0 )
            {
                mm0 = m+(vstart-1);
                mm1 = m-1+(vstart-1);
                ap::vmove(&vttemp(vstart), &vt(mm1, vstart), ap::vlen(vstart,vend), cosr);
                ap::vadd(&vttemp(vstart), &vt(mm0, vstart), ap::vlen(vstart,vend), sinr);
                ap::vmul(&vt(mm0, vstart), ap::vlen(vstart,vend), cosr);
                ap::vsub(&vt(mm0, vstart), &vt(mm1, vstart), ap::vlen(vstart,vend), sinr);
                ap::vmove(&vt(mm1, vstart), &vttemp(vstart), ap::vlen(vstart,vend));
            }
            if( nru>0 )
            {
                mm0 = m+ustart-1;
                mm1 = m-1+ustart-1;
                ap::vmove(utemp.getvector(ustart, uend), u.getcolumn(mm1, ustart, uend), cosl);
                ap::vadd(utemp.getvector(ustart, uend), u.getcolumn(mm0, ustart, uend), sinl);
                ap::vmul(u.getcolumn(mm0, ustart, uend), cosl);
                ap::vsub(u.getcolumn(mm0, ustart, uend), u.getcolumn(mm1, ustart, uend), sinl);
                ap::vmove(u.getcolumn(mm1, ustart, uend), utemp.getvector(ustart, uend));
            }
            if( ncc>0 )
            {
                mm0 = m+cstart-1;
                mm1 = m-1+cstart-1;
                ap::vmove(&ctemp(cstart), &c(mm1, cstart), ap::vlen(cstart,cend), cosl);
                ap::vadd(&ctemp(cstart), &c(mm0, cstart), ap::vlen(cstart,cend), sinl);
                ap::vmul(&c(mm0, cstart), ap::vlen(cstart,cend), cosl);
                ap::vsub(&c(mm0, cstart), &c(mm1, cstart), ap::vlen(cstart,cend), sinl);
                ap::vmove(&c(mm1, cstart), &ctemp(cstart), ap::vlen(cstart,cend));
            }
            m = m-2;
            continue;
        }
        
        //
        // If working on new submatrix, choose shift direction
        // (from larger end diagonal element towards smaller)
        //
        // Previously was
        //     "if (LL>OLDM) or (M<OLDLL) then"
        // fixed thanks to Michael Rolle < *****@*****.** >
        // Very strange that LAPACK still contains it.
        //
        bchangedir = false;
        if( idir==1&&fabs(d(ll))<1.0E-3*fabs(d(m)) )
        {
            bchangedir = true;
        }
        if( idir==2&&fabs(d(m))<1.0E-3*fabs(d(ll)) )
        {
            bchangedir = true;
        }
        if( ll!=oldll||m!=oldm||bchangedir )
        {
            if( fabs(d(ll))>=fabs(d(m)) )
            {
                
                //
                // Chase bulge from top (big end) to bottom (small end)
                //
                idir = 1;
            }
            else
            {
                
                //
                // Chase bulge from bottom (big end) to top (small end)
                //
                idir = 2;
            }
        }
        
        //
        // Apply convergence tests
        //
        if( idir==1 )
        {
            
            //
            // Run convergence test in forward direction
            // First apply standard test to bottom of matrix
            //
            if( (fabs(e(m-1))<=fabs(tol)*fabs(d(m)))||(tol<0&&fabs(e(m-1))<=thresh) )
            {
                e(m-1) = 0;
                continue;
            }
            if( tol>=0 )
            {
                
                //
                // If relative accuracy desired,
                // apply convergence criterion forward
                //
                mu = fabs(d(ll));
                sminl = mu;
                iterflag = false;
                for(lll = ll; lll <= m-1; lll++)
                {
                    if( fabs(e(lll))<=tol*mu )
                    {
                        e(lll) = 0;
                        iterflag = true;
                        break;
                    }
                    mu = fabs(d(lll+1))*(mu/(mu+fabs(e(lll))));
                    sminl = ap::minreal(sminl, mu);
                }
                if( iterflag )
                {
                    continue;
                }
            }
        }
        else
        {
            
            //
            // Run convergence test in backward direction
            // First apply standard test to top of matrix
            //
            if( (fabs(e(ll))<=fabs(tol)*fabs(d(ll)))||(tol<0&&fabs(e(ll))<=thresh) )
            {
                e(ll) = 0;
                continue;
            }
            if( tol>=0 )
            {
                
                //
                // If relative accuracy desired,
                // apply convergence criterion backward
                //
                mu = fabs(d(m));
                sminl = mu;
                iterflag = false;
                for(lll = m-1; lll >= ll; lll--)
                {
                    if( fabs(e(lll))<=tol*mu )
                    {
                        e(lll) = 0;
                        iterflag = true;
                        break;
                    }
                    mu = fabs(d(lll))*(mu/(mu+fabs(e(lll))));
                    sminl = ap::minreal(sminl, mu);
                }
                if( iterflag )
                {
                    continue;
                }
            }
        }
        oldll = ll;
        oldm = m;
        
        //
        // Compute shift.  First, test if shifting would ruin relative
        // accuracy, and if so set the shift to zero.
        //
        if( tol>=0&&n*tol*(sminl/smax)<=ap::maxreal(eps, 0.01*tol) )
        {
            
            //
            // Use a zero shift to avoid loss of relative accuracy
            //
            shift = 0;
        }
        else
        {
            
            //
            // Compute the shift from 2-by-2 block at end of matrix
            //
            if( idir==1 )
            {
                sll = fabs(d(ll));
                svd2x2(d(m-1), e(m-1), d(m), shift, r);
            }
            else
            {
                sll = fabs(d(m));
                svd2x2(d(ll), e(ll), d(ll+1), shift, r);
            }
            
            //
            // Test if shift negligible, and if so set to zero
            //
            if( sll>0 )
            {
                if( ap::sqr(shift/sll)<eps )
                {
                    shift = 0;
                }
            }
        }
        
        //
        // Increment iteration count
        //
        iter = iter+m-ll;
        
        //
        // If SHIFT = 0, do simplified QR iteration
        //
        if( shift==0 )
        {
            if( idir==1 )
            {
                
                //
                // Chase bulge from top to bottom
                // Save cosines and sines for later singular vector updates
                //
                cs = 1;
                oldcs = 1;
                for(i = ll; i <= m-1; i++)
                {
                    generaterotation(d(i)*cs, e(i), cs, sn, r);
                    if( i>ll )
                    {
                        e(i-1) = oldsn*r;
                    }
                    generaterotation(oldcs*r, d(i+1)*sn, oldcs, oldsn, tmp);
                    d(i) = tmp;
                    work0(i-ll+1) = cs;
                    work1(i-ll+1) = sn;
                    work2(i-ll+1) = oldcs;
                    work3(i-ll+1) = oldsn;
                }
                h = d(m)*cs;
                d(m) = h*oldcs;
                e(m-1) = h*oldsn;
                
                //
                // Update singular vectors
                //
                if( ncvt>0 )
                {
                    applyrotationsfromtheleft(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work0, work1, vt, vttemp);
                }
                if( nru>0 )
                {
                    applyrotationsfromtheright(fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work2, work3, u, utemp);
                }
                if( ncc>0 )
                {
                    applyrotationsfromtheleft(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work2, work3, c, ctemp);
                }
                
                //
                // Test convergence
                //
                if( fabs(e(m-1))<=thresh )
                {
                    e(m-1) = 0;
                }
            }
            else
            {
                
                //
                // Chase bulge from bottom to top
                // Save cosines and sines for later singular vector updates
                //
                cs = 1;
                oldcs = 1;
                for(i = m; i >= ll+1; i--)
                {
                    generaterotation(d(i)*cs, e(i-1), cs, sn, r);
                    if( i<m )
                    {
                        e(i) = oldsn*r;
                    }
                    generaterotation(oldcs*r, d(i-1)*sn, oldcs, oldsn, tmp);
                    d(i) = tmp;
                    work0(i-ll) = cs;
                    work1(i-ll) = -sn;
                    work2(i-ll) = oldcs;
                    work3(i-ll) = -oldsn;
                }
                h = d(ll)*cs;
                d(ll) = h*oldcs;
                e(ll) = h*oldsn;
                
                //
                // Update singular vectors
                //
                if( ncvt>0 )
                {
                    applyrotationsfromtheleft(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work2, work3, vt, vttemp);
                }
                if( nru>0 )
                {
                    applyrotationsfromtheright(!fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work0, work1, u, utemp);
                }
                if( ncc>0 )
                {
                    applyrotationsfromtheleft(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work0, work1, c, ctemp);
                }
                
                //
                // Test convergence
                //
                if( fabs(e(ll))<=thresh )
                {
                    e(ll) = 0;
                }
            }
        }
        else
        {
            
            //
            // Use nonzero shift
            //
            if( idir==1 )
            {
                
                //
                // Chase bulge from top to bottom
                // Save cosines and sines for later singular vector updates
                //
                f = (fabs(d(ll))-shift)*(extsignbdsqr(double(1), d(ll))+shift/d(ll));
                g = e(ll);
                for(i = ll; i <= m-1; i++)
                {
                    generaterotation(f, g, cosr, sinr, r);
                    if( i>ll )
                    {
                        e(i-1) = r;
                    }
                    f = cosr*d(i)+sinr*e(i);
                    e(i) = cosr*e(i)-sinr*d(i);
                    g = sinr*d(i+1);
                    d(i+1) = cosr*d(i+1);
                    generaterotation(f, g, cosl, sinl, r);
                    d(i) = r;
                    f = cosl*e(i)+sinl*d(i+1);
                    d(i+1) = cosl*d(i+1)-sinl*e(i);
                    if( i<m-1 )
                    {
                        g = sinl*e(i+1);
                        e(i+1) = cosl*e(i+1);
                    }
                    work0(i-ll+1) = cosr;
                    work1(i-ll+1) = sinr;
                    work2(i-ll+1) = cosl;
                    work3(i-ll+1) = sinl;
                }
                e(m-1) = f;
                
                //
                // Update singular vectors
                //
                if( ncvt>0 )
                {
                    applyrotationsfromtheleft(fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work0, work1, vt, vttemp);
                }
                if( nru>0 )
                {
                    applyrotationsfromtheright(fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work2, work3, u, utemp);
                }
                if( ncc>0 )
                {
                    applyrotationsfromtheleft(fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work2, work3, c, ctemp);
                }
                
                //
                // Test convergence
                //
                if( fabs(e(m-1))<=thresh )
                {
                    e(m-1) = 0;
                }
            }
            else
            {
                
                //
                // Chase bulge from bottom to top
                // Save cosines and sines for later singular vector updates
                //
                f = (fabs(d(m))-shift)*(extsignbdsqr(double(1), d(m))+shift/d(m));
                g = e(m-1);
                for(i = m; i >= ll+1; i--)
                {
                    generaterotation(f, g, cosr, sinr, r);
                    if( i<m )
                    {
                        e(i) = r;
                    }
                    f = cosr*d(i)+sinr*e(i-1);
                    e(i-1) = cosr*e(i-1)-sinr*d(i);
                    g = sinr*d(i-1);
                    d(i-1) = cosr*d(i-1);
                    generaterotation(f, g, cosl, sinl, r);
                    d(i) = r;
                    f = cosl*e(i-1)+sinl*d(i-1);
                    d(i-1) = cosl*d(i-1)-sinl*e(i-1);
                    if( i>ll+1 )
                    {
                        g = sinl*e(i-2);
                        e(i-2) = cosl*e(i-2);
                    }
                    work0(i-ll) = cosr;
                    work1(i-ll) = -sinr;
                    work2(i-ll) = cosl;
                    work3(i-ll) = -sinl;
                }
                e(ll) = f;
                
                //
                // Test convergence
                //
                if( fabs(e(ll))<=thresh )
                {
                    e(ll) = 0;
                }
                
                //
                // Update singular vectors if desired
                //
                if( ncvt>0 )
                {
                    applyrotationsfromtheleft(!fwddir, ll+vstart-1, m+vstart-1, vstart, vend, work2, work3, vt, vttemp);
                }
                if( nru>0 )
                {
                    applyrotationsfromtheright(!fwddir, ustart, uend, ll+ustart-1, m+ustart-1, work0, work1, u, utemp);
                }
                if( ncc>0 )
                {
                    applyrotationsfromtheleft(!fwddir, ll+cstart-1, m+cstart-1, cstart, cend, work0, work1, c, ctemp);
                }
            }
        }
        
        //
        // QR iteration finished, go back and check convergence
        //
        continue;
    }
    
    //
    // All singular values converged, so make them positive
    //
    for(i = 1; i <= n; i++)
    {
        if( d(i)<0 )
        {
            d(i) = -d(i);
            
            //
            // Change sign of singular vectors, if desired
            //
            if( ncvt>0 )
            {
                ap::vmul(&vt(i+vstart-1, vstart), ap::vlen(vstart,vend), -1);
            }
        }
    }
    
    //
    // Sort the singular values into decreasing order (insertion sort on
    // singular values, but only one transposition per singular vector)
    //
    for(i = 1; i <= n-1; i++)
    {
        
        //
        // Scan for smallest D(I)
        //
        isub = 1;
        smin = d(1);
        for(j = 2; j <= n+1-i; j++)
        {
            if( d(j)<=smin )
            {
                isub = j;
                smin = d(j);
            }
        }
        if( isub!=n+1-i )
        {
            
            //
            // Swap singular values and vectors
            //
            d(isub) = d(n+1-i);
            d(n+1-i) = smin;
            if( ncvt>0 )
            {
                j = n+1-i;
                ap::vmove(&vttemp(vstart), &vt(isub+vstart-1, vstart), ap::vlen(vstart,vend));
                ap::vmove(&vt(isub+vstart-1, vstart), &vt(j+vstart-1, vstart), ap::vlen(vstart,vend));
                ap::vmove(&vt(j+vstart-1, vstart), &vttemp(vstart), ap::vlen(vstart,vend));
            }
            if( nru>0 )
            {
                j = n+1-i;
                ap::vmove(utemp.getvector(ustart, uend), u.getcolumn(isub+ustart-1, ustart, uend));
                ap::vmove(u.getcolumn(isub+ustart-1, ustart, uend), u.getcolumn(j+ustart-1, ustart, uend));
                ap::vmove(u.getcolumn(j+ustart-1, ustart, uend), utemp.getvector(ustart, uend));
            }
            if( ncc>0 )
            {
                j = n+1-i;
                ap::vmove(&ctemp(cstart), &c(isub+cstart-1, cstart), ap::vlen(cstart,cend));
                ap::vmove(&c(isub+cstart-1, cstart), &c(j+cstart-1, cstart), ap::vlen(cstart,cend));
                ap::vmove(&c(j+cstart-1, cstart), &ctemp(cstart), ap::vlen(cstart,cend));
            }
        }
    }
    return result;
}
Example #8
0
/** Perform curve fitting via Levenberg-Marquardt method with optional bounds.
  * \param fxnIn Function to fit.
  * \param Xvals_ target X values (ordinates, M)
  * \param Yvals_ target Y values (coordinates, M)
  * \param ParamVec input parameters(N); at finish contains best esimate of fit parameters
  * \param boundsIn true if parameter has bounds (N)
  * \param lboundIn contain lower bounds for parameter (N)
  * \param uboundIn contain upper bounds for parameter (N)
  * \param weightsIn contain weights for Y values (M)
  * \param tolerance Fit tolerance
  * \param maxIterations Number of iterations to try.
  */
int CurveFit::LevenbergMarquardt(FitFunctionType fxnIn, Darray const& Xvals_,
                                 Darray const& Yvals_, Darray& ParamVec,
                                 std::vector<bool> const& boundsIn,
                                 Darray const& lboundIn, Darray const& uboundIn,
                                 Darray const& weightsIn,
                                 double tolerance, int maxIterations)
{
  int info = 0;
  hasBounds_ = boundsIn;
  Ubound_ = uboundIn;
  Lbound_ = lboundIn;
  Weights_ = weightsIn;
  if (ParametersHaveProblems(Xvals_, Yvals_, ParamVec)) {
    DBGPRINT("Error: %s\n", errorMessage_);
    return info;
  }
  // Set initial parameters
  finalY_ = Yvals_;
  Params_= ParamVec;  // Internal parameter vector
  fParms_ = ParamVec; // Parameters for function evaluation
  Pvec_to_Params( ParamVec );

  fxn_ = fxnIn;
  m_ = Xvals_.size();        // Number of values (rows)
  n_ = Params_.size();       // Number of parameters (cols)
  Darray residual_( m_ );    // Contain Y vals at current Params minus original Y
  // Jacobian matrix/R: m rows by n cols, transposed.
  jacobian_.assign( m_ * n_, 0.0 );

  // For holding diagonal elements for scaling // TODO: Rename
  Darray diag( n_, 0.0 );
  // Workspace arrays
  Darray work1( n_, 0.0 );
  Darray work2( n_, 0.0 );
  Darray newResidual( m_, 0.0 );
  // delta specifies an upper bound on the euclidean norm of d*x
  double delta = 0.0;
  // factor is positive input variable used in determining the initial step
  // bound. This bound is set to the product of factor and the euclidean norm
  // of diag*x if nonzero, or else to factor itself. In most cases factor should
  // lie in the interval (.1,100.). 100. is a generally recommended value.
  // TODO: Make input parameter
  const double factor = 100.0;
  // gtol is a nonnegative input variable. Termination occurs when the cosine 
  // of the angle between residual and any column of the Jacobian is at most 
  // gtol in absolute value. Therefore, gtol measures the orthogonality
  // desired between the function vector and the columns of the Jacobian.
  // TODO: Make input parameter
  const double gtol = 0.0;
  // Smallest possible magnitude
  // TODO: Make Constant?
  const double dwarf = DBL_MIN;
  // Levenberg-Marquard parameter
  double LM_par = 0.0;
  //  xtol is a nonnegative input variable. Termination occurs when the 
  //  relative error between two consecutive iterates is at most xtol.
  //  Therefore, xtol measures the relative error desired in the 
  //  approximate solution.
  // TODO: Make input paramter
  double xtol = 0.0;
  // maxfev is the maxiumum number of allowed function evaluations.
  // NOTE: Included here for complete compatibility with eariler code,
  //       though may not be strictly necessary.
  dsize maxfev = (n_ + 1) * (dsize)maxIterations;

  // Evaluate initial function, obtain residual
  EvaluateFxn( Xvals_, Yvals_, Params_, residual_ );

  // Calculate norm of the residual
  double rnorm = VecNorm( residual_ );
  DBGPRINT("Rnorm= %g\n", rnorm);

  dsize nfev = 1; // Will be set to calls to fxn_. 1 already.

  // MAIN LOOP
  int currentIt = 0;
  while ( currentIt < maxIterations ) {
    DBGPRINT("DEBUG: ----- Iteration %i ------------------------------\n", currentIt+1);
    // Calculate the Jacobian using the forward-difference approximation.
    CalcJacobian_ForwardDiff( Xvals_, Yvals_, Params_, residual_, newResidual );
    PrintMatrix( "Jacobian", n_, m_, jacobian_ );
    nfev += n_;

    // -------------------------------------------
    // | BEGIN QRFAC
    // -------------------------------------------
    // Perform QR factorization of Jacobian via Householder transformations
    // with column pivoting:
    //   J * P = Q * R
    // where J is the Jacobian, P is the permutation matrix, Q is an orthogonal
    // matrix, and R is an upper trapezoidal matrix with diagonal elements of
    // nonincreasing magnitude. The Householder transformation for column k,
    // k = 1,2,...,min(m,n), is of the form:
    //   I = (1/u[k])*u*ut
    // where u has zeros in the first k-1 positions. Adapted from routine qrfac_
    // in Grace 5.1.22 lmdif.c, which in turn is derived from an earlier linpack
    // subroutine.
    DBGPRINT("\nQRFAC ITERATION %i\n", currentIt+1);
    // Rdiag will hold the diagonal elements of R.
    Darray Rdiag(n_, 0.0);
    // Jpvt defines the permutation matrix p such that a*p = q*r. Column j of p
    // is column Jpvt[j] of the identity matrix.
    Iarray Jpvt(n_, 0.0);
    // JcolNorm Will hold the norms of the columns of input Jacobian.
    Darray JcolNorm_(n_, 0.0);
    // Compute the initial column norms and initialize arrays.
    for (dsize in = 0; in != n_; in++) {
      JcolNorm_[in] = VecNorm( jacobian_.begin() + (in * m_), m_ ); 
      Rdiag[in] = JcolNorm_[in];
      work1[in] = Rdiag[in];
      Jpvt[in] = in;
      DBGPRINT("%lu: Rdiag= %12.6g    wa= %12.6g    ipvt= %i\n",
             in+1, Rdiag[in], work1[in], Jpvt[in]+1);
    }

    // Reduce Jacobian to R with Householder transformations.
    dsize min_m_n = std::min( m_, n_ );
    for (dsize in = 0; in != min_m_n; in++) {
      // Bring the column of largest norm into the pivot position.
      dsize kmax = in;
      for (dsize k = in; k != n_; k++) {
        DBGPRINT("\tRdiag[%lu]= %g    Rdiag[%lu]= %g\n", k+1, Rdiag[k], kmax+1, Rdiag[kmax]);
        if (Rdiag[k] > Rdiag[kmax])
          kmax = k;
      }
      DBGPRINT("Elt= %lu    Kmax= %lu\n", in+1, kmax+1);
      if (kmax != in) {
        for (dsize i = 0; i != m_; i++) {
          double temp = jacobian_[i + in * m_];
          jacobian_[i + in   * m_] = jacobian_[i + kmax * m_];
          jacobian_[i + kmax * m_] = temp;
          DBGPRINT("DBG: Swap jac[%lu,%lu] with jac[%lu,%lu]\n", i+1, in+1, i+1, kmax+1);
        }
        Rdiag[kmax] = Rdiag[in];
        work1[kmax] = work1[in];
        dsize k = Jpvt[in];
        Jpvt[in] = Jpvt[kmax];
        Jpvt[kmax] = k;
      }

      // Compute the Householder transformation to reduce the j-th column
      // of Jacobian to a multiple of the j-th unit vector.
      double ajnorm = VecNorm( jacobian_.begin() + (in + in * m_), m_ - in );
      DBGPRINT("#Elt= %lu    mat[%lu]= %g    ajnorm= %g\n",
             m_ - in, in + in * m_, jacobian_[in + in * m_], ajnorm);
      if (ajnorm != 0.0) {
        if (jacobian_[in + in * m_] < 0.0)
          ajnorm = -ajnorm;
        for (dsize im = in; im < m_; im++)
          jacobian_[im + in * m_] /= ajnorm;
        jacobian_[in + in * m_] += 1.0;

        // Apply the transfomation to the remaining columns and update the norms
        dsize in1 = in + 1;
        if (in1 < n_) {
          for (dsize k = in1; k < n_; k++) {
            double sum = 0.0;
            for (dsize i = in; i < m_; i++)
              sum += jacobian_[i + in * m_] * jacobian_[i + k * m_];
            double temp = sum / jacobian_[in + in * m_];
            for (dsize i = in; i < m_; i++)
              jacobian_[i + k * m_] -= temp * jacobian_[i + in * m_];
            if (Rdiag[k] != 0.0) {
              temp = jacobian_[in + k * m_] / Rdiag[k];
              Rdiag[k] *= sqrt( std::max( 0.0, 1.0 - temp * temp ) );
              temp = Rdiag[k] / work1[k];
              DBGPRINT("\t\tQRFAC TEST: 0.5 * %g^2 <= %g\n", temp, machine_epsilon);
              if (0.05 * (temp * temp) <= machine_epsilon) {
                DBGPRINT("\t\tTEST PASSED\n");
                Rdiag[k] = VecNorm( jacobian_.begin() + (in1 + k * m_), m_ - in - 1 );
                work1[k] = Rdiag[k];
              }
            }
            DBGPRINT("QRFAC Rdiag[%lu]= %g\n", k+1, Rdiag[k]);
          }
        }
      }
      Rdiag[in] = -ajnorm;
    }      
    // -------------------------------------------
    // | END QRFAC
    // -------------------------------------------

    PrintMatrix("QR", n_, m_, jacobian_);
    for (dsize in = 0; in != n_; in++)
      DBGPRINT("\tRdiag[%lu]= %12.6g    acnorm[%lu]= %12.6g    ipvt[%lu]= %i\n",
             in+1, Rdiag[in], in+1, JcolNorm_[in], in+1, Jpvt[in]+1);

    double xnorm = 0.0;
    if ( currentIt == 0 ) {
      // First iteration. Scale according to the norms of the columns of
      // the initial Jacobian.
      for (dsize in = 0; in != n_; in++) {
        if ( JcolNorm_[in] == 0.0 )
          diag[in] = 1.0;
        else
          diag[in] = JcolNorm_[in];
      }
      PrintVector("diag", diag);
      // Calculate norm of scaled params and init step bound delta
      for (dsize in = 0; in != n_; in++)
        work1[in] = diag[in] * Params_[in];
      xnorm = VecNorm( work1 );
      delta = factor * xnorm;
      if (delta == 0.0)
        delta = factor;
      DBGPRINT("Delta= %g\n", delta);
    }

    // Form Qt * residual and store in Qt_r. Only first n components of
    // Qt*r are needed.
    Darray Qt_r( n_, 0.0 );
    newResidual = residual_;
    for (dsize in = 0; in != n_; in++) {
      double matElt = jacobian_[in + in * m_];
      DBGPRINT("DEBUG: Element %lu = %g\n", in+1, matElt);
      if (matElt != 0.0) {
        double sum = 0.0;
        for (dsize i = in; i < m_; i++)
          sum += jacobian_[i + in *  m_] * newResidual[i];
        double temp = -sum / matElt;
        for (dsize i = in; i < m_; i++)
          newResidual[i] += jacobian_[i + in * m_] * temp;
      }
      jacobian_[in + in * m_] = Rdiag[in]; 
      DBGPRINT("\tRdiag[%lu]= %g\n", in+1, jacobian_[in + in * m_]);
      Qt_r[in] = newResidual[in];
      DBGPRINT("DEBUG: qtf[%lu]= %g\n", in+1, Qt_r[in]);
    }

    // Compute the norm of the scaled gradient.
    double gnorm = 0.0;
    if ( rnorm > 0.0 ) {
      for (dsize in = 0; in != n_; in++) {
        int l = Jpvt[ in ];
        if (JcolNorm_[in] != 0.0) {
          double sum = 0.0;
          for (dsize i2 = 0; i2 <= in; i2++) {
            sum += jacobian_[i2 + in * m_] * (Qt_r[i2] / rnorm);
            DBGPRINT("DEBUG: jacobian[%lu, %lu]= %g\n", i2+1, in+1, jacobian_[i2 + in * m_]);
          }
          // Determine max
          double d1 = fabs( sum / JcolNorm_[l] );
          gnorm = std::max( gnorm, d1 );
        }
      }
    }
    DBGPRINT("gnorm= %g\n", gnorm);
    // Test for convergence of gradient norm.
    if (gnorm <= gtol) {
      DBGPRINT("Gradient norm %g is less than gtol %g, iteration %i\n",
             gnorm, gtol, currentIt+1);
      info = 4;
      break;
    }

    // Rescale if necessary
    for (dsize in = 0; in != n_; in++)
      diag[in] = std::max( diag[in], JcolNorm_[in] );
    PrintVector("RescaleDiag", diag);
    double Ratio = 0.0;
    while (Ratio < 0.0001) {
      // -----------------------------------------
      // | LMPAR BEGIN
      // -----------------------------------------
      // NOTE: Adapted from lmpar_ in lmdif.c from Grace 5.1.22
      DBGPRINT("\nLMPAR ITERATION %i\n", currentIt+1);
      // Determine the Levenberg-Marquardt parameter.
      Darray Xvec(n_, 0.0);  // Will contain solution to A*x=b, sqrt(par)*D*x=0
      // Compute and store in Xvec the Gauss-Newton direction.
      // If the Jacobian is rank-deficient, obtain a least-squares solution.
      dsize rank = n_;
      for (dsize in = 0; in != n_; in++) {
        work1[in] = Qt_r[in];
        DBGPRINT("jac[%lu,%lu]= %12.6g  rank= %4lu", in+1, in+1, jacobian_[in + in * m_], rank);
        if (jacobian_[in + in * m_] == 0.0 && rank == n_)
          rank = in;
        if (rank < n_)
          work1[in] = 0.0;
        DBGPRINT("   wa1= %12.6g\n", work1[in]);
      }
      DBGPRINT("Final rank= %lu\n", rank);
      // Subtract 1 from rank to use as an index.
      long int rm1 = rank - 1;
      if (rm1 >= 0) {
        for (long int k = 0; k <= rm1; k++) {
          long int in = rm1 - k;
          work1[in] /= jacobian_[in + in * (long int)m_];
          DBGPRINT("wa1[%li] /= %g\n", in+1, jacobian_[in + in * (long int)m_]);
          long int in1 = in - 1;
          if (in1 > -1) {
            double temp = work1[in];
            for (long int i = 0; i <= in1; i++) {
              work1[i] -= jacobian_[i + in * (long int)m_] * temp;
              DBGPRINT("  wa1[%li] -= %g\n", i+1, jacobian_[i + in * (long int)m_]);
            }
          }
        }
      }
      PrintVector("work1", work1);
      for (dsize in = 0; in != n_; in++)
        Xvec[ Jpvt[in] ] = work1[in];
  
      // Evaluate the function at the origin, and test for acceptance of
      // the Gauss-Newton direction.
      for (dsize in = 0; in != n_; in++) {
        DBGPRINT("work2[%lu] = %g * %g\n", in+1, diag[in], Xvec[in]);
        work2[in] = diag[in] * Xvec[in];
      }
      double dxnorm = VecNorm( work2 );
      DBGPRINT("dxnorm= %g\n", dxnorm);
      double fp = dxnorm - delta;
      // Initialize counter for searching for LM parameter
      int lmIterations = 0;
      if (fp > 0.1 * delta) {
        // If the Jacobian is not rank deficient, the Newton step provdes a lower
        // bound, parl, for the zero of the function. Otherwise set this bound to
        // zero
        double parl = 0.0;
        if ( rank >= n_ ) {
          for (dsize in = 0; in != n_; in++) {
            int idx = Jpvt[in];
            work1[in] = diag[idx] * (work2[idx] / dxnorm);
          }
          for (dsize in = 0; in != n_; in++) {
            double sum = 0.0;
            long int in1 = (long int)in - 1;
            if (in1 > -1) {
              for (long int i = 0; i <= in1; i++)
                sum += jacobian_[i + in * m_] * work1[i];
            }
            work1[in] = (work1[in] - sum) / jacobian_[in + in * m_];
          }
          double temp = VecNorm(work1);
          parl = fp / delta / temp / temp;
        }
        DBGPRINT("parl= %g\n",parl);
  
        // Calculate an upper bound, paru, for the zero of the function
        for (dsize in = 0; in != n_; in++) {
          double sum = 0.0;
          for (dsize i = 0; i <= in; i++)
            sum += jacobian_[i + in * m_] * Qt_r[i];
          work1[in] = sum / diag[ Jpvt[in] ];
          DBGPRINT("paru work1[%lu]= %g\n", in+1, work1[in]);
        }
        double w1norm = VecNorm( work1 );
        double paru = w1norm / delta;
        DBGPRINT("paru = %g = %g / %g\n", paru, w1norm, delta);
        if (paru == 0.0)
          paru = dwarf / std::min( delta, 0.1 );
        DBGPRINT("paru= %g\n", paru);
        
        // If the current L-M parameter lies outside of the interval (parl,paru),
        // set par to the closer endpoint.
        LM_par = std::max( LM_par, parl );
        LM_par = std::min( LM_par, paru );
        if (LM_par == 0.0)
          LM_par = w1norm / dxnorm;
  
        // Iteration start.
        bool lmLoop = true;
        while (lmLoop) {
          ++lmIterations;
          DBGPRINT("\t[ lmLoop %i  parl= %g  paru= %g   LM_par= %g ]\n",
                 lmIterations, parl, paru, LM_par);
          // Evaluate the function at the current value of LM_par
          if (LM_par == 0.0)
            LM_par = std::max( dwarf, 0.001 * paru );
          double temp = sqrt( LM_par );
          for (dsize in = 0; in != n_; in++) {
            work1[in] = temp * diag[in];
            DBGPRINT("\tLMPAR work1[%lu]= %g = %g * %g\n", in+1, work1[in], temp, diag[in]);
          }
  
          // -----------------------------------------------
          // | BEGIN QRSOLV
          // -----------------------------------------------
          // NOTE: Adapted from qrsolv_ in lmdif.c from Grace 5.1.22 
          DBGPRINT("\nQRSOLV ITERATION %i\n", lmIterations);
          // This array of length n will hold the diagonal elements of the
          // upper triangular matrix s.
          Darray Sdiag(n_, 0.0);

          // Copy R and Qt_r to preserve input and initialize s.
          for (dsize in = 0; in != n_; in++) {
            for (dsize i = in; i != n_; i++)
              jacobian_[i + in * m_] = jacobian_[in + i * m_];
            Xvec[in] = jacobian_[in + in * m_];
            work2[in] = Qt_r[in];
          }

          // Eliminate the diagonal matrix D using a Givens rotation
          for (dsize in = 0; in != n_; in++) {
            // Prepare the row of D to be eliminated, locating the diagonal 
            // element using p from the QR factorization.
            double diagL = work1[ Jpvt[in] ];
            DBGPRINT("diag[%i] = %g\n", Jpvt[in]+1, diagL);
            if (diagL != 0.0) {
              for (dsize k = in; k < n_; k++)
                Sdiag[k] = 0.0;
              Sdiag[in] = diagL;
              // The transformations to eliminate the row of D modify only
              // a single element of Qt_r beyond the first n, which is 
              // initially zero.
              double qtbpj = 0.0;
              for (dsize k = in; k < n_; k++) {
                // Determine a Givens rotation which eliminates the appropriate
                // element in the current row of D.
                if (Sdiag[k] != 0.0) {
                  double d1 = jacobian_[k + k * m_];
                  double cos, sin;
                  if (fabs(d1) < fabs(Sdiag[k])) {
                    double cotan = jacobian_[k + k * m_] / Sdiag[k];
                    sin = 0.5 / sqrt(0.25 + 0.25 * (cotan * cotan));
                    cos = sin * cotan;
                  } else {
                    double tan = Sdiag[k] / jacobian_[k + k *m_];
                    cos = 0.5 / sqrt(0.25 + 0.25 * (tan * tan));
                    sin = cos * tan;
                  }
                  DBGPRINT("DBG QRsolv: %lu, %lu: cos= %12.6g  sin= %12.6g\n",
                         in+1, k+1, cos, sin);
                  // Compute the modified diagonal element of R and the 
                  // modified element of (Qt_r, 0)
                  jacobian_[k + k * m_] = cos * jacobian_[k + k * m_] + sin * Sdiag[k];
                  double temp = cos * work2[k] + sin * qtbpj; 
                  qtbpj = -sin * work2[k] + cos * qtbpj;
                  work2[k] = temp;
                  DBGPRINT("\twork2[%lu]= %g\n", k+1, work2[k]);
                  // Accumulate the transformation in the row of S
                  dsize kp1 = k + 1;
                  if (kp1 <= n_) {
                    for (dsize i = kp1; i < n_; i++) {
                      temp = cos * jacobian_[i + k * m_] + sin * Sdiag[i];
                      Sdiag[i] = -sin * jacobian_[i + k * m_] + cos * Sdiag[i];
                      jacobian_[i + k * m_] = temp;
                    }
                  }
                }
              }
            }
            // Store the diagonal element of s and restore the corresponding
            // diagonal element of r
            Sdiag[in] = jacobian_[in + in * m_];
            DBGPRINT("QRsolv sdiag[%lu]= %g\n", in+1, Sdiag[in]);
            jacobian_[in + in * m_] = Xvec[in];
          }
  
          // Solve the triangular system for z. if the system is singular,
          // then obtain a least-squares solution.
          dsize u_nsing = n_;
          for (dsize in = 0; in != n_; in++) {
            if (Sdiag[in] == 0.0 && u_nsing == n_)
              u_nsing = in;
            if (u_nsing < n_)
              work2[in] = 0.0;
          }
          DBGPRINT("nsing= %lu\n", u_nsing);
          // Subtract 1 from nsing to use as an index.
          long int nsing = (long int)u_nsing - 1;
          if (nsing >= 0) {
            for (long int k = 0; k <= nsing; k++) {
              long int in = nsing - k;
              double sum = 0.0;
              long int in1 = in + 1;
              if (in1 <= nsing) {
                for (long int i = in1; i <= nsing; i++)
                  sum += jacobian_[i + in * (long int)m_] * work2[i];
              }
              work2[in] = (work2[in] - sum) / Sdiag[in];
            }
          }

          // Permute the components of z back to components of x.
          for (dsize in = 0; in != n_; in++)
            Xvec[ Jpvt[in] ] = work2[in];

          PrintVector("QRsolv Xvec", Xvec);
          PrintVector("sdiag", Sdiag);
          // -----------------------------------------------
          // | END QRSOLV
          // -----------------------------------------------

          for (dsize in = 0; in != n_; in++) {
            work2[in] = diag[in] * Xvec[in];
            DBGPRINT("\twork2[%lu]= %g = %g * %g\n", in+1, work2[in], diag[in], Xvec[in]);
          }
  
          dxnorm = VecNorm( work2 );
          temp = fp;
          fp = dxnorm - delta;
          DBGPRINT("DBG LMPAR: fp = %g = %g - %g\n", fp, dxnorm, delta); 
          // If the function is small enough, accept the current value of LM_par.
          // Also test for the exceptional cases where parl is zero of the number
          // of iterations has reached 10.
          if (fabs(fp) <= 0.1 * delta ||
              (parl == 0.0 && fp <= temp && temp < 0.0) ||
              lmIterations == 10)
          {
            lmLoop = false;
            break;
          }
  
          // Compute the Newton correction.
          for (dsize in = 0; in != n_; in++) {
            int idx = Jpvt[ in ];
            work1[in] = diag[idx] * (work2[idx] / dxnorm);
          }
          for (dsize in = 0; in != n_; in++) {
            work1[in] /= Sdiag[in];
            temp = work1[in];
            dsize in1 = in + 1;
            if (in1 <= n_) {
              for (dsize i = in1; i < n_; i++)
                work1[i] -= jacobian_[i + in * m_] * temp;
            }
          }
          temp = VecNorm( work1 );
          double parc = fp / delta / temp / temp;
          DBGPRINT("parc= %g\n", parc);
          
          // Depending on the sign of the function, update parl or paru
          if (fp > 0.0)
            parl = std::max( parl, LM_par );
          if (fp < 0.0)
            paru = std::min( paru, LM_par );

          // Compute an improved estimate for the parameter
          LM_par = std::max( parl, LM_par + parc );
        }
      }
      DBGPRINT("END LMPAR iter= %i  LM_par= %g\n", lmIterations, LM_par); 
      if (lmIterations == 0)
        LM_par = 0.0;
      // -------------------------------------------
      // | LMPAR END
      // -------------------------------------------
      DBGPRINT("DEBUG: LM_par is %g\n", LM_par);
      // Store the direction Xvec and Param + Xvec. Calculate norm of Param.
      PrintVector("DEBUG: hvec", Xvec);
      for (dsize in = 0; in != n_; in++) {
        Xvec[in] = -Xvec[in];
        work1[in] = Params_[in] + Xvec[in];
        work2[in] = diag[in] * Xvec[in];
      }
      double pnorm = VecNorm( work2 );
      DBGPRINT("pnorm= %g\n", pnorm);

      // On first iteration, adjust initial step bound
      if (currentIt == 0)
        delta = std::min( delta, pnorm );
      DBGPRINT("Delta is now %g\n", delta);

      // Evaluate function at Param + Xvec and calculate its norm
      EvaluateFxn( Xvals_, Yvals_, work1, newResidual );
      ++nfev;
      double rnorm1 = VecNorm( newResidual );
      DBGPRINT("rnorm1= %g\n", rnorm1);

      // Compute the scaled actual reduction
      double actual_reduction = -1.0;
      if ( 0.1 * rnorm1 < rnorm) {
        double d1 = rnorm1 / rnorm;
        actual_reduction = 1.0 - d1 * d1;
      }
      DBGPRINT("actualReduction= %g\n", actual_reduction);

      // Compute the scaled predicted reduction and the scaled directional
      // derivative.
      for (dsize in = 0; in != n_; in++)
      {
        work2[in] = 0.0;
        double temp = Xvec[ Jpvt[in] ];
        for (dsize i = 0; i <= in; i++)
          work2[i] += jacobian_[i + in * m_] * temp;
      }
      double temp1 = VecNorm( work2 ) / rnorm;
      double temp2 = sqrt(LM_par) * pnorm / rnorm;
      DBGPRINT("temp1= %g    temp2= %g\n", temp1, temp2);

      double predicted_reduction = temp1 * temp1 + temp2 * temp2 / 0.5;
      double dirder = -(temp1 * temp1 + temp2 * temp2);
      DBGPRINT("predictedReduction = %12.6g    dirder= %12.6g\n", predicted_reduction, dirder);

      // Compute the ratio of the actial to the predicted reduction.
      if (predicted_reduction != 0.0)
        Ratio = actual_reduction / predicted_reduction;
      DBGPRINT("ratio= %g\n", Ratio);

      // Update the step bound
      if (Ratio <= 0.25) {
        DBGPRINT("Ratio <= 0.25\n");
        double temp;
        if (actual_reduction < 0.0)
          temp = 0.5 * dirder / (dirder + 0.5 * actual_reduction);
        else
          temp = 0.5;
        if ( 0.1 * rnorm1 >= rnorm || temp < 0.1 )
          temp = 0.1;
        DBGPRINT("delta = %g * min( %g, %g )\n", temp, delta, pnorm / 0.1); 
        delta = temp * std::min( delta, pnorm / 0.1 );
        LM_par /= temp;
      } else {
        if (LM_par == 0.0 || Ratio >= 0.75) {
          DBGPRINT("Ratio > 0.25 and (LMpar is zero or Ratio >= 0.75\n");
          delta = pnorm / 0.5;
          LM_par *= 0.5;
        }
      }
      DBGPRINT("LM_par= %g    Delta= %g\n", LM_par, delta);

      if (Ratio >= 0.0001) { 
        // Successful iteration. Update Param, residual, and their norms.
        for (dsize in = 0; in != n_; in++) {
          Params_[in] = work1[in];
          work1[in] = diag[in] * Params_[in];
        }
        for (dsize im = 0; im < m_; im++)
          residual_[im] = newResidual[im];
        xnorm = VecNorm( work1 );
        rnorm = rnorm1;
      }
      
      // Tests for convergence
      if (fabs(actual_reduction) <= tolerance &&
          predicted_reduction <= tolerance &&
          0.5 * Ratio <= 1.0)
        info = 1;
      if (delta <= xtol * xnorm)
        info = 2;  
      if (fabs(actual_reduction) <= tolerance &&
          predicted_reduction <= tolerance &&
          0.5 * Ratio <= 1.0 &&
          info == 2)
        info = 3;
      if (info != 0) break;
                 
      // Tests for stringent tolerance
      if (nfev >= maxfev)
        info = 5;
      if (fabs(actual_reduction) <= machine_epsilon &&
          predicted_reduction <= machine_epsilon &&
          0.5 * Ratio <= 1.0)
        info = 6;
      if (delta <= machine_epsilon * xnorm)
        info = 7;
      if (gnorm <= machine_epsilon)
        info = 8;
      if (info != 0) break;

    } // END inner loop
    if (info != 0) break;
    currentIt++;
  }
  // Final parameters and Y at final parameters
  Params_to_Pvec(ParamVec, Params_);
  fxn_(Xvals_, ParamVec, finalY_);
# ifdef DBG_CURVEFIT
  DBGPRINT("%s\n", Message(info));
  DBGPRINT("Exiting with info value = %i\n", info);
  for (dsize in = 0; in != n_; in++)
    DBGPRINT("\tParams[%lu]= %g\n", in, ParamVec[in]);
# endif
  return info;
}