Esempio n. 1
0
/* blocked multiplication of the transpose of the nxm matrix a with itself (i.e. a^T a)
 * using a block size of bsize. The product is returned in b.
 * Since a^T a is symmetric, its computation can be speeded up by computing only its
 * upper triangular part and copying it to the lower part.
 *
 * More details on blocking can be found at 
 * http://www-2.cs.cmu.edu/afs/cs/academic/class/15213-f02/www/R07/section_a/Recitation07-SectionA.pdf
 */
void TRANS_MAT_MAT_MULT(LM_REAL *a, LM_REAL *b, int n, int m)
{
#ifdef HAVE_LAPACK /* use BLAS matrix multiply */

LM_REAL alpha=CNST(1.0), beta=CNST(0.0);
  /* Fool BLAS to compute a^T*a avoiding transposing a: a is equivalent to a^T in column major,
   * therefore BLAS computes a*a^T with a and a*a^T in column major, which is equivalent to
   * computing a^T*a in row major!
   */
  GEMM("N", "T", &m, &m, &n, &alpha, a, &m, a, &m, &beta, b, &m);

#else /* no LAPACK, use blocking-based multiply */

register int i, j, k, jj, kk;
register LM_REAL sum, *bim, *akm;
const int bsize=__BLOCKSZ__;

#define __MIN__(x, y) (((x)<=(y))? (x) : (y))
#define __MAX__(x, y) (((x)>=(y))? (x) : (y))

  /* compute upper triangular part using blocking */
  for(jj=0; jj<m; jj+=bsize){
    for(i=0; i<m; ++i){
      bim=b+i*m;
      for(j=__MAX__(jj, i); j<__MIN__(jj+bsize, m); ++j)
        bim[j]=0.0; //b[i*m+j]=0.0;
    }

    for(kk=0; kk<n; kk+=bsize){
      for(i=0; i<m; ++i){
        bim=b+i*m;
        for(j=__MAX__(jj, i); j<__MIN__(jj+bsize, m); ++j){
          sum=0.0;
          for(k=kk; k<__MIN__(kk+bsize, n); ++k){
            akm=a+k*m;
            sum+=akm[i]*akm[j]; //a[k*m+i]*a[k*m+j];
          }
          bim[j]+=sum; //b[i*m+j]+=sum;
        }
      }
    }
  }

  /* copy upper triangular part to the lower one */
  for(i=0; i<m; ++i)
    for(j=0; j<i; ++j)
      b[i*m+j]=b[j*m+i];

#undef __MIN__
#undef __MAX__

#endif /* HAVE_LAPACK */
}
Esempio n. 2
0
/* central finite difference approximation to the jacobian of func */
void FDIF_CENT_JAC_APPROX(
    void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata),
													   /* function to differentiate */
    LM_REAL *p,              /* I: current parameter estimate, mx1 */
    LM_REAL *hxm,            /* W/O: work array for evaluating func(p-delta), nx1 */
    LM_REAL *hxp,            /* W/O: work array for evaluating func(p+delta), nx1 */
    LM_REAL delta,           /* increment for computing the jacobian */
    LM_REAL *jac,            /* O: array for storing approximated jacobian, nxm */
    int m,
    int n,
    void *adata)
{
register int i, j;
LM_REAL tmp;
register LM_REAL d;

  for(j=0; j<m; ++j){
    /* determine d=max(1E-04*|p[j]|, delta), see HZ */
    d=CNST(1E-04)*p[j]; // force evaluation
    d=FABS(d);
    if(d<delta)
      d=delta;

    tmp=p[j];
    p[j]-=d;
    (*func)(p, hxm, m, n, adata);

    p[j]=tmp+d;
    (*func)(p, hxp, m, n, adata);
    p[j]=tmp; /* restore */

    d=CNST(0.5)/d; /* invert so that divisions can be carried out faster as multiplications */
    for(i=0; i<n; ++i){
      jac[i*m+j]=(hxp[i]-hxm[i])*d;
    }
  }
}
Esempio n. 3
0
int LEVMAR_DER(
  void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), /* functional relation describing measurements. A p \in R^m yields a \hat{x} \in  R^n */
  void (*jacf)(LM_REAL *p, LM_REAL *j, int m, int n, void *adata),  /* function to evaluate the jacobian \part x / \part p */ 
  LM_REAL *p,         /* I/O: initial parameter estimates. On output has the estimated solution */
  LM_REAL *x,         /* I: measurement vector */
  int m,              /* I: parameter vector dimension (i.e. #unknowns) */
  int n,              /* I: measurement vector dimension */
  int itmax,          /* I: maximum number of iterations */
  LM_REAL opts[4],    /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3]. Respectively the scale factor for initial \mu,
                       * stopping thresholds for ||J^T e||_inf, ||Dp||_2 and ||e||_2. Set to NULL for defaults to be used
                       */
  LM_REAL info[LM_INFO_SZ],
					           /* O: information regarding the minimization. Set to NULL if don't care
                      * info[0]= ||e||_2 at initial p.
                      * info[1-4]=[ ||e||_2, ||J^T e||_inf,  ||Dp||_2, mu/max[J^T J]_ii ], all computed at estimated p.
                      * info[5]= # iterations,
                      * info[6]=reason for terminating: 1 - stopped by small gradient J^T e
                      *                                 2 - stopped by small Dp
                      *                                 3 - stopped by itmax
                      *                                 4 - singular matrix. Restart from current p with increased mu 
                      *                                 5 - no further error reduction is possible. Restart with increased mu
                      *                                 6 - stopped by small ||e||_2
                      * info[7]= # function evaluations
                      * info[8]= # jacobian evaluations
                      */
  LM_REAL *work,     /* working memory, allocate if NULL */
  LM_REAL *covar,    /* O: Covariance matrix corresponding to LS solution; mxm. Set to NULL if not needed. */
  void *adata)       /* pointer to possibly additional data, passed uninterpreted to func & jacf.
                      * Set to NULL if not needed
                      */
{
register int i, j, k, l;
int worksz, freework=0, issolved;
/* temp work arrays */
LM_REAL *e,          /* nx1 */
       *hx,         /* \hat{x}_i, nx1 */
       *jacTe,      /* J^T e_i mx1 */
       *jac,        /* nxm */
       *jacTjac,    /* mxm */
       *Dp,         /* mx1 */
   *diag_jacTjac,   /* diagonal of J^T J, mx1 */
       *pDp;        /* p + Dp, mx1 */

register LM_REAL mu,  /* damping constant */
                tmp; /* mainly used in matrix & vector multiplications */
LM_REAL p_eL2, jacTe_inf, pDp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+Dp)||_2 */
LM_REAL p_L2, Dp_L2=LM_REAL_MAX, dF, dL;
LM_REAL tau, eps1, eps2, eps2_sq, eps3;
LM_REAL init_p_eL2;
int nu=2, nu2, stop, nfev, njev=0;
const int nm=n*m;

  mu=jacTe_inf=0.0; /* -Wall */

  if(n<m){
    fprintf(stderr, LCAT(LEVMAR_DER, "(): cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n"), n, m);
    return -1;
  }

  if(!jacf){
    fprintf(stderr, RCAT("No function specified for computing the jacobian in ", LEVMAR_DER)
        RCAT("().\nIf no such function is available, use ", LEVMAR_DIF) RCAT("() rather than ", LEVMAR_DER) "()\n");
    return -1;
  }

  if(opts){
	  tau=opts[0];
	  eps1=opts[1];
	  eps2=opts[2];
	  eps2_sq=opts[2]*opts[2];
    eps3=opts[3];
  }
  else{ // use default values
	  tau=CNST(LM_INIT_MU);
	  eps1=CNST(LM_STOP_THRESH);
	  eps2=CNST(LM_STOP_THRESH);
	  eps2_sq=CNST(LM_STOP_THRESH)*CNST(LM_STOP_THRESH);
    eps3=CNST(LM_STOP_THRESH);
  }

  if(!work){
    worksz=LM_DER_WORKSZ(m, n); //2*n+4*m + n*m + m*m;
    work=(LM_REAL *)malloc(worksz*sizeof(LM_REAL)); /* allocate a big chunk in one step */
    if(!work){
      fprintf(stderr, LCAT(LEVMAR_DER, "(): memory allocation request failed\n"));
      return -1;
    }
    freework=1;
  }

  /* set up work arrays */
  e=work;
  hx=e + n;
  jacTe=hx + n;
  jac=jacTe + m;
  jacTjac=jac + nm;
  Dp=jacTjac + m*m;
  diag_jacTjac=Dp + m;
  pDp=diag_jacTjac + m;

  /* compute e=x - f(p) and its L2 norm */
  (*func)(p, hx, m, n, adata); nfev=1;
  for(i=0, p_eL2=0.0; i<n; ++i){
    e[i]=tmp=x[i]-hx[i];
    p_eL2+=tmp*tmp;
  }
  init_p_eL2=p_eL2;

  for(k=stop=0; k<itmax && !stop; ++k){
    /* Note that p and e have been updated at a previous iteration */

    if(p_eL2<=eps3){ /* error is small */
      stop=6;
      break;
    }

    /* Compute the jacobian J at p,  J^T J,  J^T e,  ||J^T e||_inf and ||p||^2.
     * Since J^T J is symmetric, its computation can be speeded up by computing
     * only its upper triangular part and copying it to the lower part
     */

    (*jacf)(p, jac, m, n, adata); ++njev;

    /* J^T J, J^T e */
    if(nm<__BLOCKSZ__SQ){ // this is a small problem
      /* This is the straightforward way to compute J^T J, J^T e. However, due to
       * its noncontinuous memory access pattern, it incures many cache misses when
       * applied to large minimization problems (i.e. problems involving a large
       * number of free variables and measurements), in which J is too large to
       * fit in the L1 cache. For such problems, a cache-efficient blocking scheme
       * is preferable.
       *
       * Thanks to John Nitao of Lawrence Livermore Lab for pointing out this
       * performance problem.
       *
       * On the other hand, the straightforward algorithm is faster on small
       * problems since in this case it avoids the overheads of blocking. 
       */

      for(i=0; i<m; ++i){
        for(j=i; j<m; ++j){
          int lm;

          for(l=0, tmp=0.0; l<n; ++l){
            lm=l*m;
            tmp+=jac[lm+i]*jac[lm+j];
          }

		      /* store tmp in the corresponding upper and lower part elements */
          jacTjac[i*m+j]=jacTjac[j*m+i]=tmp;
        }

        /* J^T e */
        for(l=0, tmp=0.0; l<n; ++l)
          tmp+=jac[l*m+i]*e[l];
        jacTe[i]=tmp;
      }
    }
    else{ // this is a large problem
      /* Cache efficient computation of J^T J based on blocking
       */
      TRANS_MAT_MAT_MULT(jac, jacTjac, n, m);

      /* cache efficient computation of J^T e */
      for(i=0; i<m; ++i)
        jacTe[i]=0.0;

      for(i=0; i<n; ++i){
        register LM_REAL *jacrow;

        for(l=0, jacrow=jac+i*m, tmp=e[i]; l<m; ++l)
          jacTe[l]+=jacrow[l]*tmp;
      }
    }

	  /* Compute ||J^T e||_inf and ||p||^2 */
    for(i=0, p_L2=jacTe_inf=0.0; i<m; ++i){
      if(jacTe_inf < (tmp=FABS(jacTe[i]))) jacTe_inf=tmp;

      diag_jacTjac[i]=jacTjac[i*m+i]; /* save diagonal entries so that augmentation can be later canceled */
      p_L2+=p[i]*p[i];
    }
    //p_L2=sqrt(p_L2);

#if 0
if(!(k%10)){
  printf("Iter: %d, estimate: ", k);
  for(i=0; i<m; ++i)
    printf("%.9g ", p[i]);
  printf("-- errors %.9g %0.9g\n", jacTe_inf, p_eL2);
}
#endif

    /* check for convergence */
    if((jacTe_inf <= eps1)){
      Dp_L2=0.0; /* no increment for p in this case */
      stop=1;
      break;
    }

   /* compute initial damping factor */
    if(k==0){
      for(i=0, tmp=LM_REAL_MIN; i<m; ++i)
        if(diag_jacTjac[i]>tmp) tmp=diag_jacTjac[i]; /* find max diagonal element */
      mu=tau*tmp;
    }

    /* determine increment using adaptive damping */
    while(1){
      /* augment normal equations */
      for(i=0; i<m; ++i)
        jacTjac[i*m+i]+=mu;

      /* solve augmented equations */
#ifdef HAVE_LAPACK
      /* 5 alternatives are available: LU, Cholesky, 2 variants of QR decomposition and SVD.
       * Cholesky is the fastest but might be inaccurate; QR is slower but more accurate;
       * SVD is the slowest but most accurate; LU offers a tradeoff between accuracy and speed
       */

      issolved=AX_EQ_B_LU(jacTjac, jacTe, Dp, m);
      //issolved=AX_EQ_B_CHOL(jacTjac, jacTe, Dp, m);
      //issolved=AX_EQ_B_QR(jacTjac, jacTe, Dp, m);
      //issolved=AX_EQ_B_QRLS(jacTjac, jacTe, Dp, m, m);
      //issolved=AX_EQ_B_SVD(jacTjac, jacTe, Dp, m);

#else
      /* use the LU included with levmar */
      issolved=AX_EQ_B_LU(jacTjac, jacTe, Dp, m);
#endif /* HAVE_LAPACK */

      if(issolved){
        /* compute p's new estimate and ||Dp||^2 */
        for(i=0, Dp_L2=0.0; i<m; ++i){
          pDp[i]=p[i] + (tmp=Dp[i]);
          Dp_L2+=tmp*tmp;
        }
        //Dp_L2=sqrt(Dp_L2);

        if(Dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */
        //if(Dp_L2<=eps2*(p_L2 + eps2)){ /* relative change in p is small, stop */
          stop=2;
          break;
        }

       if(Dp_L2>=(p_L2+eps2)/(CNST(EPSILON)*CNST(EPSILON))){ /* almost singular */
       //if(Dp_L2>=(p_L2+eps2)/CNST(EPSILON)){ /* almost singular */
         stop=4;
         break;
       }

        (*func)(pDp, hx, m, n, adata); ++nfev; /* evaluate function at p + Dp */
        for(i=0, pDp_eL2=0.0; i<n; ++i){ /* compute ||e(pDp)||_2 */
          hx[i]=tmp=x[i]-hx[i];
          pDp_eL2+=tmp*tmp;
        }

        for(i=0, dL=0.0; i<m; ++i)
          dL+=Dp[i]*(mu*Dp[i]+jacTe[i]);

        dF=p_eL2-pDp_eL2;

        if(dL>0.0 && dF>0.0){ /* reduction in error, increment is accepted */
          tmp=(CNST(2.0)*dF/dL-CNST(1.0));
          tmp=CNST(1.0)-tmp*tmp*tmp;
          mu=mu*( (tmp>=CNST(ONE_THIRD))? tmp : CNST(ONE_THIRD) );
          nu=2;

          for(i=0 ; i<m; ++i) /* update p's estimate */
            p[i]=pDp[i];

          for(i=0; i<n; ++i) /* update e and ||e||_2 */
            e[i]=hx[i];
          p_eL2=pDp_eL2;
          break;
        }
      }

      /* if this point is reached, either the linear system could not be solved or
       * the error did not reduce; in any case, the increment must be rejected
       */

      mu*=nu;
      nu2=nu<<1; // 2*nu;
      if(nu2<=nu){ /* nu has wrapped around (overflown). Thanks to Frank Jordan for spotting this case */
        stop=5;
        break;
      }
      nu=nu2;

      for(i=0; i<m; ++i) /* restore diagonal J^T J entries */
        jacTjac[i*m+i]=diag_jacTjac[i];
    } /* inner loop */
  }

  if(k>=itmax) stop=3;

  for(i=0; i<m; ++i) /* restore diagonal J^T J entries */
    jacTjac[i*m+i]=diag_jacTjac[i];

  if(info){
    info[0]=init_p_eL2;
    info[1]=p_eL2;
    info[2]=jacTe_inf;
    info[3]=Dp_L2;
    for(i=0, tmp=LM_REAL_MIN; i<m; ++i)
      if(tmp<jacTjac[i*m+i]) tmp=jacTjac[i*m+i];
    info[4]=mu/tmp;
    info[5]=(LM_REAL)k;
    info[6]=(LM_REAL)stop;
    info[7]=(LM_REAL)nfev;
    info[8]=(LM_REAL)njev;
  }

  /* covariance matrix */
  if(covar){
    LEVMAR_COVAR(jacTjac, covar, p_eL2, m, n);
  }

  if(freework) free(work);

  return (stop!=4)?  k : -1;
}
Esempio n. 4
0
static void
LNSRCH(int m, LM_REAL *x, LM_REAL f, LM_REAL *g, LM_REAL *p, LM_REAL alpha, LM_REAL *xpls,
       LM_REAL *ffpls, void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), struct FUNC_STATE state,
       int *mxtake, int *iretcd, LM_REAL stepmx, LM_REAL steptl, LM_REAL *sx)
{
/* Find a next newton iterate by backtracking line search.
 * Specifically, finds a \lambda such that for a fixed alpha<0.5 (usually 1e-4),
 * f(x + \lambda*p) <= f(x) + alpha * \lambda * g^T*p
 *
 * Translated (with minor changes) from Schnabel, Koontz & Weiss uncmin.f,  v1.3

 * PARAMETERS :

 *	m       --> dimension of problem (i.e. number of variables)
 *	x(m)    --> old iterate:	x[k-1]
 *	f       --> function value at old iterate, f(x)
 *	g(m)    --> gradient at old iterate, g(x), or approximate
 *	p(m)    --> non-zero newton step
 *	alpha   --> fixed constant < 0.5 for line search (see above)
 *	xpls(m) <--	 new iterate x[k]
 *	ffpls   <--	 function value at new iterate, f(xpls)
 *	func    --> name of subroutine to evaluate function
 *	state   <--> information other than x and m that func requires.
 *			    state is not modified in xlnsrch (but can be modified by func).
 *	iretcd  <--	 return code
 *	mxtake  <--	 boolean flag indicating step of maximum length used
 *	stepmx  --> maximum allowable step size
 *	steptl  --> relative step size at which successive iterates
 *			    considered close enough to terminate algorithm
 *	sx(m)	  --> diagonal scaling matrix for x, can be NULL

 *	internal variables

 *	sln		 newton length
 *	rln		 relative length of newton step
*/

    register int i;
    int firstback = 1;
    LM_REAL disc;
    LM_REAL a3, b;
    LM_REAL t1, t2, t3, lambda, tlmbda, rmnlmb;
    LM_REAL scl, rln, sln, slp;
    LM_REAL tmp1, tmp2;
    LM_REAL fpls, pfpls = 0., plmbda = 0.; /* -Wall */

    f*=CNST(0.5);
    *mxtake = 0;
    *iretcd = 2;
    tmp1 = 0.;
    if(!sx) /* no scaling */
      for (i = 0; i < m; ++i)
        tmp1 += p[i] * p[i];
    else
      for (i = 0; i < m; ++i)
        tmp1 += sx[i] * sx[i] * p[i] * p[i];
    sln = (LM_REAL)sqrt(tmp1);
    if (sln > stepmx) {
	  /*	newton step longer than maximum allowed */
	    scl = stepmx / sln;
      for(i=0; i<m; ++i) /* p * scl */
        p[i]*=scl;
	    sln = stepmx;
    }
    for(i=0, slp=0.; i<m; ++i) /* g^T * p */
      slp+=g[i]*p[i];
    rln = 0.;
    if(!sx) /* no scaling */
      for (i = 0; i < m; ++i) {
	      tmp1 = (FABS(x[i])>=CNST(1.))? FABS(x[i]) : CNST(1.);
	      tmp2 = FABS(p[i])/tmp1;
	      if(rln < tmp2) rln = tmp2;
      }
    else
      for (i = 0; i < m; ++i) {
	      tmp1 = (FABS(x[i])>=CNST(1.)/sx[i])? FABS(x[i]) : CNST(1.)/sx[i];
	      tmp2 = FABS(p[i])/tmp1;
	      if(rln < tmp2) rln = tmp2;
      }
    rmnlmb = steptl / rln;
    lambda = CNST(1.0);

    /*	check if new iterate satisfactory.  generate new lambda if necessary. */

    while(*iretcd > 1) {
	    for (i = 0; i < m; ++i)
	      xpls[i] = x[i] + lambda * p[i];

      /* evaluate function at new point */
      (*func)(xpls, state.hx, m, state.n, state.adata);
      for(i=0, tmp1=0.0; i<state.n; ++i){
        state.hx[i]=tmp2=state.x[i]-state.hx[i];
        tmp1+=tmp2*tmp2;
      }
      fpls=CNST(0.5)*tmp1; *ffpls=tmp1; ++(*(state.nfev));

	    if (fpls <= f + slp * alpha * lambda) { /* solution found */
	      *iretcd = 0;
	      if (lambda == CNST(1.) && sln > stepmx * CNST(.99)) *mxtake = 1;
	      return;
	    }

	    /* else : solution not (yet) found */

      /* First find a point with a finite value */

	    if (lambda < rmnlmb) {
	      /* no satisfactory xpls found sufficiently distinct from x */

	      *iretcd = 1;
	      return;
	    }
	    else { /*	calculate new lambda */

	      /* modifications to cover non-finite values */
	      if (fpls >= LM_REAL_MAX) {
		      lambda *= CNST(0.1);
		      firstback = 1;
	      }
	      else {
		      if (firstback) { /*	first backtrack: quadratic fit */
		        tlmbda = -lambda * slp / ((fpls - f - slp) * CNST(2.));
		        firstback = 0;
		      }
		      else { /*	all subsequent backtracks: cubic fit */
		        t1 = fpls - f - lambda * slp;
		        t2 = pfpls - f - plmbda * slp;
		        t3 = CNST(1.) / (lambda - plmbda);
		        a3 = CNST(3.) * t3 * (t1 / (lambda * lambda)
				      - t2 / (plmbda * plmbda));
		        b = t3 * (t2 * lambda / (plmbda * plmbda)
			          - t1 * plmbda / (lambda * lambda));
		        disc = b * b - a3 * slp;
		        if (disc > b * b)
			    /* only one positive critical point, must be minimum */
			        tlmbda = (-b + ((a3 < 0)? -(LM_REAL)sqrt(disc): (LM_REAL)sqrt(disc))) /a3;
		        else
			    /* both critical points positive, first is minimum */
			        tlmbda = (-b + ((a3 < 0)? (LM_REAL)sqrt(disc): -(LM_REAL)sqrt(disc))) /a3;

		        if (tlmbda > lambda * CNST(.5))
			        tlmbda = lambda * CNST(.5);
		    }
		    plmbda = lambda;
		    pfpls = fpls;
		    if (tlmbda < lambda * CNST(.1))
		      lambda *= CNST(.1);
		    else
		      lambda = tlmbda;
      }
	  }
  }
} /* LNSRCH */
Esempio n. 5
0
int LEVMAR_BC_DER(
  void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), /* functional relation describing measurements. A p \in R^m yields a \hat{x} \in  R^n */
  void (*jacf)(LM_REAL *p, LM_REAL *j, int m, int n, void *adata),  /* function to evaluate the jacobian \part x / \part p */ 
  LM_REAL *p,         /* I/O: initial parameter estimates. On output has the estimated solution */
  LM_REAL *x,         /* I: measurement vector */
  int m,              /* I: parameter vector dimension (i.e. #unknowns) */
  int n,              /* I: measurement vector dimension */
  LM_REAL *lb,        /* I: vector of lower bounds. If NULL, no lower bounds apply */
  LM_REAL *ub,        /* I: vector of upper bounds. If NULL, no upper bounds apply */
  int itmax,          /* I: maximum number of iterations */
  LM_REAL opts[4],    /* I: minim. options [\mu, \epsilon1, \epsilon2, \epsilon3]. Respectively the scale factor for initial \mu,
                       * stopping thresholds for ||J^T e||_inf, ||Dp||_2 and ||e||_2. Set to NULL for defaults to be used.
                       * Note that ||J^T e||_inf is computed on free (not equal to lb[i] or ub[i]) variables only.
                       */
  LM_REAL info[LM_INFO_SZ],
					           /* O: information regarding the minimization. Set to NULL if don't care
                      * info[0]= ||e||_2 at initial p.
                      * info[1-4]=[ ||e||_2, ||J^T e||_inf,  ||Dp||_2, mu/max[J^T J]_ii ], all computed at estimated p.
                      * info[5]= # iterations,
                      * info[6]=reason for terminating: 1 - stopped by small gradient J^T e
                      *                                 2 - stopped by small Dp
                      *                                 3 - stopped by itmax
                      *                                 4 - singular matrix. Restart from current p with increased mu 
                      *                                 5 - no further error reduction is possible. Restart with increased mu
                      *                                 6 - stopped by small ||e||_2
                      * info[7]= # function evaluations
                      * info[8]= # jacobian evaluations
                      */
  LM_REAL *work,     /* working memory, allocate if NULL */
  LM_REAL *covar,    /* O: Covariance matrix corresponding to LS solution; mxm. Set to NULL if not needed. */
  void *adata)       /* pointer to possibly additional data, passed uninterpreted to func & jacf.
                      * Set to NULL if not needed
                      */
{
register int i, j, k, l;
int worksz, freework=0, issolved;
/* temp work arrays */
LM_REAL *e,          /* nx1 */
       *hx,         /* \hat{x}_i, nx1 */
       *jacTe,      /* J^T e_i mx1 */
       *jac,        /* nxm */
       *jacTjac,    /* mxm */
       *Dp,         /* mx1 */
   *diag_jacTjac,   /* diagonal of J^T J, mx1 */
       *pDp;        /* p + Dp, mx1 */

register LM_REAL mu,  /* damping constant */
                tmp; /* mainly used in matrix & vector multiplications */
LM_REAL p_eL2, jacTe_inf, pDp_eL2; /* ||e(p)||_2, ||J^T e||_inf, ||e(p+Dp)||_2 */
LM_REAL p_L2, Dp_L2=LM_REAL_MAX, dF, dL;
LM_REAL tau, eps1, eps2, eps2_sq, eps3;
LM_REAL init_p_eL2;
int nu=2, nu2, stop, nfev, njev=0;
const int nm=n*m;

/* variables for constrained LM */
struct FUNC_STATE fstate;
LM_REAL alpha=CNST(1e-4), beta=CNST(0.9), gamma=CNST(0.99995), gamma_sq=gamma*gamma, rho=CNST(1e-8);
LM_REAL t, t0;
LM_REAL steptl=CNST(1e3)*(LM_REAL)sqrt(LM_REAL_EPSILON), jacTeDp;
LM_REAL tmin=CNST(1e-12), tming=CNST(1e-18); /* minimum step length for LS and PG steps */
const LM_REAL tini=CNST(1.0); /* initial step length for LS and PG steps */
int nLMsteps=0, nLSsteps=0, nPGsteps=0, gprevtaken=0;
int numactive;

  mu=jacTe_inf=t=0.0;  tmin=tmin; /* -Wall */

  if(n<m){
    fprintf(stderr, LCAT(LEVMAR_BC_DER, "(): cannot solve a problem with fewer measurements [%d] than unknowns [%d]\n"), n, m);
    exit(1);
  }

  if(!jacf){
    fprintf(stderr, RCAT("No function specified for computing the jacobian in ", LEVMAR_BC_DER)
        RCAT("().\nIf no such function is available, use ", LEVMAR_BC_DIF) RCAT("() rather than ", LEVMAR_BC_DER) "()\n");
    exit(1);
  }

  if(!BOXCHECK(lb, ub, m)){
    fprintf(stderr, LCAT(LEVMAR_BC_DER, "(): at least one lower bound exceeds the upper one\n"));
    exit(1);
  }

  if(opts){
	  tau=opts[0];
	  eps1=opts[1];
	  eps2=opts[2];
	  eps2_sq=opts[2]*opts[2];
	  eps3=opts[3];
  }
  else{ // use default values
	  tau=CNST(LM_INIT_MU);
	  eps1=CNST(LM_STOP_THRESH);
	  eps2=CNST(LM_STOP_THRESH);
	  eps2_sq=CNST(LM_STOP_THRESH)*CNST(LM_STOP_THRESH);
	  eps3=CNST(LM_STOP_THRESH);
  }

  if(!work){
    worksz=LM_DER_WORKSZ(m, n); //2*n+4*m + n*m + m*m;
    work=(LM_REAL *)malloc(worksz*sizeof(LM_REAL)); /* allocate a big chunk in one step */
    if(!work){
      fprintf(stderr, LCAT(LEVMAR_BC_DER, "(): memory allocation request failed\n"));
      exit(1);
    }
    freework=1;
  }

  /* set up work arrays */
  e=work;
  hx=e + n;
  jacTe=hx + n;
  jac=jacTe + m;
  jacTjac=jac + nm;
  Dp=jacTjac + m*m;
  diag_jacTjac=Dp + m;
  pDp=diag_jacTjac + m;

  fstate.n=n;
  fstate.hx=hx;
  fstate.x=x;
  fstate.adata=adata;
  fstate.nfev=&nfev;
  
  /* see if starting point is within the feasile set */
  for(i=0; i<m; ++i)
    pDp[i]=p[i];
  BOXPROJECT(p, lb, ub, m); /* project to feasible set */
  for(i=0; i<m; ++i)
    if(pDp[i]!=p[i])
      fprintf(stderr, RCAT("Warning: component %d of starting point not feasible in ", LEVMAR_BC_DER) "()! [%g projected to %g]\n",
                      i, p[i], pDp[i]);

  /* compute e=x - f(p) and its L2 norm */
  (*func)(p, hx, m, n, adata); nfev=1;
  for(i=0, p_eL2=0.0; i<n; ++i){
    e[i]=tmp=x[i]-hx[i];
    p_eL2+=tmp*tmp;
  }
  init_p_eL2=p_eL2;

  for(k=stop=0; k<itmax && !stop; ++k){
 //printf("%d  %.15g\n", k, 0.5*p_eL2);
    /* Note that p and e have been updated at a previous iteration */

    if(p_eL2<=eps3){ /* error is small */
      stop=6;
      break;
    }

    /* Compute the jacobian J at p,  J^T J,  J^T e,  ||J^T e||_inf and ||p||^2.
     * Since J^T J is symmetric, its computation can be speeded up by computing
     * only its upper triangular part and copying it to the lower part
     */

    (*jacf)(p, jac, m, n, adata); ++njev;

    /* J^T J, J^T e */
    if(nm<__BLOCKSZ__SQ){ // this is a small problem
      /* This is the straightforward way to compute J^T J, J^T e. However, due to
       * its noncontinuous memory access pattern, it incures many cache misses when
       * applied to large minimization problems (i.e. problems involving a large
       * number of free variables and measurements), in which J is too large to
       * fit in the L1 cache. For such problems, a cache-efficient blocking scheme
       * is preferable.
       *
       * Thanks to John Nitao of Lawrence Livermore Lab for pointing out this
       * performance problem.
       *
       * On the other hand, the straightforward algorithm is faster on small
       * problems since in this case it avoids the overheads of blocking. 
       */

      for(i=0; i<m; ++i){
        for(j=i; j<m; ++j){
          int lm;

          for(l=0, tmp=0.0; l<n; ++l){
            lm=l*m;
            tmp+=jac[lm+i]*jac[lm+j];
          }

		      /* store tmp in the corresponding upper and lower part elements */
          jacTjac[i*m+j]=jacTjac[j*m+i]=tmp;
        }

        /* J^T e */
        for(l=0, tmp=0.0; l<n; ++l)
          tmp+=jac[l*m+i]*e[l];
        jacTe[i]=tmp;
      }
    }
    else{ // this is a large problem
      /* Cache efficient computation of J^T J based on blocking
       */
      TRANS_MAT_MAT_MULT(jac, jacTjac, n, m);

      /* cache efficient computation of J^T e */
      for(i=0; i<m; ++i)
        jacTe[i]=0.0;

      for(i=0; i<n; ++i){
        register LM_REAL *jacrow;

        for(l=0, jacrow=jac+i*m, tmp=e[i]; l<m; ++l)
          jacTe[l]+=jacrow[l]*tmp;
      }
    }

	  /* Compute ||J^T e||_inf and ||p||^2. Note that ||J^T e||_inf
     * is computed for free (i.e. inactive) variables only. 
     * At a local minimum, if p[i]==ub[i] then g[i]>0;
     * if p[i]==lb[i] g[i]<0; otherwise g[i]=0 
     */
    for(i=j=numactive=0, p_L2=jacTe_inf=0.0; i<m; ++i){
      if(ub && p[i]==ub[i]){ ++numactive; if(jacTe[i]>0.0) ++j; }
      else if(lb && p[i]==lb[i]){ ++numactive; if(jacTe[i]<0.0) ++j; }
      else if(jacTe_inf < (tmp=FABS(jacTe[i]))) jacTe_inf=tmp;

      diag_jacTjac[i]=jacTjac[i*m+i]; /* save diagonal entries so that augmentation can be later canceled */
      p_L2+=p[i]*p[i];
    }
    //p_L2=sqrt(p_L2);

#if 0
if(!(k%100)){
  printf("Current estimate: ");
  for(i=0; i<m; ++i)
    printf("%.9g ", p[i]);
  printf("-- errors %.9g %0.9g, #active %d [%d]\n", jacTe_inf, p_eL2, numactive, j);
}
#endif

    /* check for convergence */
    if(j==numactive && (jacTe_inf <= eps1)){
      Dp_L2=0.0; /* no increment for p in this case */
      stop=1;
      break;
    }

   /* compute initial damping factor */
    if(k==0){
      if(!lb && !ub){ /* no bounds */
        for(i=0, tmp=LM_REAL_MIN; i<m; ++i)
          if(diag_jacTjac[i]>tmp) tmp=diag_jacTjac[i]; /* find max diagonal element */
        mu=tau*tmp;
      }
      else 
        mu=CNST(0.5)*tau*p_eL2; /* use Kanzow's starting mu */
    }

    /* determine increment using a combination of adaptive damping, line search and projected gradient search */
    while(1){
      /* augment normal equations */
      for(i=0; i<m; ++i)
        jacTjac[i*m+i]+=mu;

      /* solve augmented equations */
      /* 5 alternatives are available: LU, Cholesky, 2 variants of QR decomposition and SVD.
       * Cholesky is the fastest but might be inaccurate; QR is slower but more accurate;
       * SVD is the slowest but most accurate; LU offers a tradeoff between accuracy and speed
       */

      issolved=AX_EQ_B_LU(jacTjac, jacTe, Dp, m);
      //issolved=AX_EQ_B_CHOL(jacTjac, jacTe, Dp, m);
      //issolved=AX_EQ_B_QR(jacTjac, jacTe, Dp, m);
      //issolved=AX_EQ_B_QRLS(jacTjac, jacTe, Dp, m, m);
      //issolved=AX_EQ_B_SVD(jacTjac, jacTe, Dp, m);

      if(issolved){
        for(i=0; i<m; ++i)
          pDp[i]=p[i] + Dp[i];

        /* compute p's new estimate and ||Dp||^2 */
        BOXPROJECT(pDp, lb, ub, m); /* project to feasible set */
        for(i=0, Dp_L2=0.0; i<m; ++i){
          Dp[i]=tmp=pDp[i]-p[i];
          Dp_L2+=tmp*tmp;
        }
        //Dp_L2=sqrt(Dp_L2);

        if(Dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */
          stop=2;
          break;
        }

       if(Dp_L2>=(p_L2+eps2)/(CNST(EPSILON)*CNST(EPSILON))){ /* almost singular */
         stop=4;
         break;
       }

        (*func)(pDp, hx, m, n, adata); ++nfev; /* evaluate function at p + Dp */
        for(i=0, pDp_eL2=0.0; i<n; ++i){ /* compute ||e(pDp)||_2 */
          hx[i]=tmp=x[i]-hx[i];
          pDp_eL2+=tmp*tmp;
        }

        if(pDp_eL2<=gamma_sq*p_eL2){
          for(i=0, dL=0.0; i<m; ++i)
            dL+=Dp[i]*(mu*Dp[i]+jacTe[i]);

#if 1
          if(dL>0.0){
            dF=p_eL2-pDp_eL2;
            tmp=(CNST(2.0)*dF/dL-CNST(1.0));
            tmp=CNST(1.0)-tmp*tmp*tmp;
            mu=mu*( (tmp>=CNST(ONE_THIRD))? tmp : CNST(ONE_THIRD) );
          }
          else
            mu=(mu>=pDp_eL2)? pDp_eL2 : mu; /* pDp_eL2 is the new pDp_eL2 */
#else

          mu=(mu>=pDp_eL2)? pDp_eL2 : mu; /* pDp_eL2 is the new pDp_eL2 */
#endif

          nu=2;

          for(i=0 ; i<m; ++i) /* update p's estimate */
            p[i]=pDp[i];

          for(i=0; i<n; ++i) /* update e and ||e||_2 */
            e[i]=hx[i];
          p_eL2=pDp_eL2;
          ++nLMsteps;
          gprevtaken=0;
          break;
        }
      }
      else{

      /* the augmented linear system could not be solved, increase mu */

        mu*=nu;
        nu2=nu<<1; // 2*nu;
        if(nu2<=nu){ /* nu has wrapped around (overflown). Thanks to Frank Jordan for spotting this case */
          stop=5;
          break;
        }
        nu=nu2;

        for(i=0; i<m; ++i) /* restore diagonal J^T J entries */
          jacTjac[i*m+i]=diag_jacTjac[i];

        continue; /* solve again with increased nu */
      }

      /* if this point is reached, the LM step did not reduce the error;
       * see if it is a descent direction
       */

      /* negate jacTe (i.e. g) & compute g^T * Dp */
      for(i=0, jacTeDp=0.0; i<m; ++i){
        jacTe[i]=-jacTe[i];
        jacTeDp+=jacTe[i]*Dp[i];
      }

      if(jacTeDp<=-rho*pow(Dp_L2, _POW_/CNST(2.0))){
        /* Dp is a descent direction; do a line search along it */
        int mxtake, iretcd;
        LM_REAL stepmx;

        tmp=(LM_REAL)sqrt(p_L2); stepmx=CNST(1e3)*( (tmp>=CNST(1.0))? tmp : CNST(1.0) );

#if 1
        /* use Schnabel's backtracking line search; it requires fewer "func" evaluations */
        LNSRCH(m, p, p_eL2, jacTe, Dp, alpha, pDp, &pDp_eL2, func, fstate,
               &mxtake, &iretcd, stepmx, steptl, NULL); /* NOTE: LNSRCH() updates hx */
        if(iretcd!=0) goto gradproj; /* rather inelegant but effective way to handle LNSRCH() failures... */
#else
        /* use the simpler (but slower!) line search described by Kanzow */
        for(t=tini; t>tmin; t*=beta){
          for(i=0; i<m; ++i){
            pDp[i]=p[i] + t*Dp[i];
            //pDp[i]=__MEDIAN3(lb[i], pDp[i], ub[i]); /* project to feasible set */
          }

          (*func)(pDp, hx, m, n, adata); ++nfev; /* evaluate function at p + t*Dp */
          for(i=0, pDp_eL2=0.0; i<n; ++i){ /* compute ||e(pDp)||_2 */
            hx[i]=tmp=x[i]-hx[i];
            pDp_eL2+=tmp*tmp;
          }
          //if(CNST(0.5)*pDp_eL2<=CNST(0.5)*p_eL2 + t*alpha*jacTeDp) break;
          if(pDp_eL2<=p_eL2 + CNST(2.0)*t*alpha*jacTeDp) break;
        }
#endif
        ++nLSsteps;
        gprevtaken=0;

        /* NOTE: new estimate for p is in pDp, associated error in hx and its norm in pDp_eL2.
         * These values are used below to update their corresponding variables 
         */
      }
      else{
gradproj: /* Note that this point can also be reached via a goto when LNSRCH() fails */

        /* jacTe is a descent direction; make a projected gradient step */

        /* if the previous step was along the gradient descent, try to use the t employed in that step */
        /* compute ||g|| */
        for(i=0, tmp=0.0; i<m; ++i)
          tmp=jacTe[i]*jacTe[i];
        tmp=(LM_REAL)sqrt(tmp);
        tmp=CNST(100.0)/(CNST(1.0)+tmp);
        t0=(tmp<=tini)? tmp : tini; /* guard against poor scaling & large steps; see (3.50) in C.T. Kelley's book */

        for(t=(gprevtaken)? t : t0; t>tming; t*=beta){
          for(i=0; i<m; ++i)
            pDp[i]=p[i] - t*jacTe[i];
          BOXPROJECT(pDp, lb, ub, m); /* project to feasible set */
          for(i=0; i<m; ++i)
            Dp[i]=pDp[i]-p[i];

          (*func)(pDp, hx, m, n, adata); ++nfev; /* evaluate function at p - t*g */
          for(i=0, pDp_eL2=0.0; i<n; ++i){ /* compute ||e(pDp)||_2 */
            hx[i]=tmp=x[i]-hx[i];
            pDp_eL2+=tmp*tmp;
          }
          for(i=0, tmp=0.0; i<m; ++i) /* compute ||g^T * Dp|| */
            tmp+=jacTe[i]*Dp[i];

          if(gprevtaken && pDp_eL2<=p_eL2 + CNST(2.0)*CNST(0.99999)*tmp){ /* starting t too small */
            t=t0;
            gprevtaken=0;
            continue;
          }
          //if(CNST(0.5)*pDp_eL2<=CNST(0.5)*p_eL2 + alpha*tmp) break;
          if(pDp_eL2<=p_eL2 + CNST(2.0)*alpha*tmp) break;
        }

        ++nPGsteps;
        gprevtaken=1;
        /* NOTE: new estimate for p is in pDp, associated error in hx and its norm in pDp_eL2 */
      }

      /* update using computed values */

      for(i=0, Dp_L2=0.0; i<m; ++i){
        tmp=pDp[i]-p[i];
        Dp_L2+=tmp*tmp;
      }
      //Dp_L2=sqrt(Dp_L2);

      if(Dp_L2<=eps2_sq*p_L2){ /* relative change in p is small, stop */
        stop=2;
        break;
      }

      for(i=0 ; i<m; ++i) /* update p's estimate */
        p[i]=pDp[i];

      for(i=0; i<n; ++i) /* update e and ||e||_2 */
        e[i]=hx[i];
      p_eL2=pDp_eL2;
      break;
    } /* inner loop */
  }

  if(k>=itmax) stop=3;

  for(i=0; i<m; ++i) /* restore diagonal J^T J entries */
    jacTjac[i*m+i]=diag_jacTjac[i];

  if(info){
    info[0]=init_p_eL2;
    info[1]=p_eL2;
    info[2]=jacTe_inf;
    info[3]=Dp_L2;
    for(i=0, tmp=LM_REAL_MIN; i<m; ++i)
      if(tmp<jacTjac[i*m+i]) tmp=jacTjac[i*m+i];
    info[4]=mu/tmp;
    info[5]=(LM_REAL)k;
    info[6]=(LM_REAL)stop;
    info[7]=(LM_REAL)nfev;
    info[8]=(LM_REAL)njev;
  }

  /* covariance matrix */
  if(covar){
    LEVMAR_COVAR(jacTjac, covar, p_eL2, m, n);
  }
                                                               
  if(freework) free(work);

#if 0
printf("%d LM steps, %d line search, %d projected gradient\n", nLMsteps, nLSsteps, nPGsteps);
#endif

  return (stop!=4)?  k : -1;
}
Esempio n. 6
0
/*
 * This function implements an elimination strategy for linearly constrained
 * optimization problems. The strategy relies on QR decomposition to transform
 * an optimization problem constrained by Ax=b to an equivalent, unconstrained
 * one. Also referred to as "null space" or "reduced Hessian" method.
 * See pp. 430-433 (chap. 15) of "Numerical Optimization" by Nocedal-Wright
 * for details.
 *
 * A is mxn with m<=n and rank(A)=m
 * Two matrices Y and Z of dimensions nxm and nx(n-m) are computed from A^T so that
 * their columns are orthonormal and every x can be written as x=Y*b + Z*x_z=
 * c + Z*x_z, where c=Y*b is a fixed vector of dimension n and x_z is an
 * arbitrary vector of dimension n-m. Then, the problem of minimizing f(x)
 * subject to Ax=b is equivalent to minimizing f(c + Z*x_z) with no constraints.
 * The computed Y and Z are such that any solution of Ax=b can be written as
 * x=Y*x_y + Z*x_z for some x_y, x_z. Furthermore, A*Y is nonsingular, A*Z=0
 * and Z spans the null space of A.
 *
 * The function accepts A, b and computes c, Y, Z. If b or c is NULL, c is not
 * computed. Also, Y can be NULL in which case it is not referenced.
 * The function returns 0 in case of error, A's computed rank if successfull
 *
 */
static int LMLEC_ELIM(LM_REAL *A, LM_REAL *b, LM_REAL *c, LM_REAL *Y, LM_REAL *Z, int m, int n)
{
static LM_REAL eps=CNST(-1.0);

LM_REAL *buf=NULL;
LM_REAL *a, *tau, *work, *r;
register LM_REAL tmp;
int a_sz, jpvt_sz, tau_sz, r_sz, Y_sz, worksz;
int info, rank, *jpvt, tot_sz, mintmn, tm, tn;
register int i, j, k;

  if(m>n){
    fprintf(stderr, RCAT("matrix of constraints cannot have more rows than columns in", LMLEC_ELIM) "()!\n");
    exit(1);
  }

  tm=n; tn=m; // transpose dimensions
  mintmn=m;

  /* calculate required memory size */
  a_sz=tm*tm; // tm*tn is enough for xgeqp3()
  jpvt_sz=tn;
  tau_sz=mintmn;
  r_sz=mintmn*mintmn; // actually smaller if a is not of full row rank
  worksz=2*tn+(tn+1)*32; // more than needed
  Y_sz=(Y)? 0 : tm*tn;

  tot_sz=jpvt_sz*sizeof(int) + (a_sz + tau_sz + r_sz + worksz + Y_sz)*sizeof(LM_REAL);
  buf=(LM_REAL *)malloc(tot_sz); /* allocate a "big" memory chunk at once */
  if(!buf){
    fprintf(stderr, RCAT("Memory allocation request failed in ", LMLEC_ELIM) "()\n");
    exit(1);
  }

  a=(LM_REAL *)buf;
  jpvt=(int *)(a+a_sz);
  tau=(LM_REAL *)(jpvt + jpvt_sz);
  r=tau+tau_sz;
  work=r+r_sz;
  if(!Y) Y=work+worksz;

  /* copy input array so that LAPACK won't destroy it. Note that copying is
   * done in row-major order, which equals A^T in column-major
   */
  for(i=0; i<tm*tn; ++i)
      a[i]=A[i];

  /* clear jpvt */
  for(i=0; i<jpvt_sz; ++i) jpvt[i]=0;

  /* rank revealing QR decomposition of A^T*/
  GEQP3((int *)&tm, (int *)&tn, a, (int *)&tm, jpvt, tau, work, (int *)&worksz, &info);
  //dgeqpf_((int *)&tm, (int *)&tn, a, (int *)&tm, jpvt, tau, work, &info);
  /* error checking */
  if(info!=0){
    if(info<0){
      fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GEQP3) " in ", LMLEC_ELIM) "()\n", -info);
      exit(1);
    }
    else if(info>0){
      fprintf(stderr, RCAT(RCAT("unknown LAPACK error (%d) for ", GEQP3) " in ", LMLEC_ELIM) "()\n", info);
      free(buf);
      return 0;
    }
  }
  /* the upper triangular part of a now contains the upper triangle of the unpermuted R */

  if(eps<0.0){
    LM_REAL aux;

    /* compute machine epsilon. DBL_EPSILON should do also */
    for(eps=CNST(1.0); aux=eps+CNST(1.0), aux-CNST(1.0)>0.0; eps*=CNST(0.5))
                              ;
    eps*=CNST(2.0);
  }

  tmp=tm*CNST(10.0)*eps*FABS(a[0]); // threshold. tm is max(tm, tn)
  tmp=(tmp>CNST(1E-12))? tmp : CNST(1E-12); // ensure that threshold is not too small
  /* compute A^T's numerical rank by counting the non-zeros in R's diagonal */
  for(i=rank=0; i<mintmn; ++i)
    if(a[i*(tm+1)]>tmp || a[i*(tm+1)]<-tmp) ++rank; /* loop across R's diagonal elements */
    else break; /* diagonal is arranged in absolute decreasing order */

  if(rank<tn){
    fprintf(stderr, RCAT("\nConstraints matrix in ",  LMLEC_ELIM) "() is not of full row rank (i.e. %d < %d)!\n"
            "Make sure that you do not specify redundant or inconsistent constraints.\n\n", rank, tn);
    exit(1);
  }

  /* compute the permuted inverse transpose of R */
  /* first, copy R from the upper triangular part of a to r. R is rank x rank */
  for(j=0; j<rank; ++j){
    for(i=0; i<=j; ++i)
      r[i+j*rank]=a[i+j*tm];
    for(i=j+1; i<rank; ++i)
      r[i+j*rank]=0.0; // lower part is zero
  }

  /* compute the inverse */
  TRTRI("U", "N", (int *)&rank, r, (int *)&rank, &info);
  /* error checking */
  if(info!=0){
    if(info<0){
      fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", TRTRI) " in ", LMLEC_ELIM) "()\n", -info);
      exit(1);
    }
    else if(info>0){
      fprintf(stderr, RCAT(RCAT("A(%d, %d) is exactly zero for ", TRTRI) " (singular matrix) in ", LMLEC_ELIM) "()\n", info, info);
      free(buf);
      return 0;
    }
  }
  /* then, transpose r in place */
  for(i=0; i<rank; ++i)
    for(j=i+1; j<rank; ++j){
      tmp=r[i+j*rank];
      k=j+i*rank;
      r[i+j*rank]=r[k];
      r[k]=tmp;
  }

  /* finally, permute R^-T using Y as intermediate storage */
  for(j=0; j<rank; ++j)
    for(i=0, k=jpvt[j]-1; i<rank; ++i)
      Y[i+k*rank]=r[i+j*rank];

  for(i=0; i<rank*rank; ++i) // copy back to r
    r[i]=Y[i];

  /* resize a to be tm x tm, filling with zeroes */
  for(i=tm*tn; i<tm*tm; ++i)
    a[i]=0.0;

  /* compute Q in a as the product of elementary reflectors. Q is tm x tm */
  ORGQR((int *)&tm, (int *)&tm, (int *)&mintmn, a, (int *)&tm, tau, work, &worksz, &info);
  /* error checking */
  if(info!=0){
    if(info<0){
      fprintf(stderr, RCAT(RCAT("LAPACK error: illegal value for argument %d of ", ORGQR) " in ", LMLEC_ELIM) "()\n", -info);
      exit(1);
    }
    else if(info>0){
      fprintf(stderr, RCAT(RCAT("unknown LAPACK error (%d) for ", ORGQR) " in ", LMLEC_ELIM) "()\n", info);
      free(buf);
      return 0;
    }
  }

  /* compute Y=Q_1*R^-T*P^T. Y is tm x rank */
  for(i=0; i<tm; ++i)
    for(j=0; j<rank; ++j){
      for(k=0, tmp=0.0; k<rank; ++k)
        tmp+=a[i+k*tm]*r[k+j*rank];
      Y[i*rank+j]=tmp;
    }

  if(b && c){
    /* compute c=Y*b */
    for(i=0; i<tm; ++i){
      for(j=0, tmp=0.0; j<rank; ++j)
        tmp+=Y[i*rank+j]*b[j];

      c[i]=tmp;
    }
  }

  /* copy Q_2 into Z. Z is tm x (tm-rank) */
  for(j=0; j<tm-rank; ++j)
    for(i=0, k=j+rank; i<tm; ++i)
      Z[i*(tm-rank)+j]=a[i+k*tm];

  free(buf);

  return rank;
}
Esempio n. 7
0
/* Similar to the LEVMAR_LEC_DER() function above, except that the jacobian is approximated
 * with the aid of finite differences (forward or central, see the comment for the opts argument)
 */
int LEVMAR_LEC_DIF(
  void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata), /* functional relation describing measurements. A p \in R^m yields a \hat{x} \in  R^n */
  LM_REAL *p,         /* I/O: initial parameter estimates. On output has the estimated solution */
  LM_REAL *x,         /* I: measurement vector */
  int m,              /* I: parameter vector dimension (i.e. #unknowns) */
  int n,              /* I: measurement vector dimension */
  LM_REAL *A,         /* I: constraints matrix, kxm */
  LM_REAL *b,         /* I: right hand constraints vector, kx1 */
  int k,              /* I: number of contraints (i.e. A's #rows) */
  int itmax,          /* I: maximum number of iterations */
  LM_REAL opts[5],    /* I: opts[0-3] = minim. options [\mu, \epsilon1, \epsilon2, \epsilon3, \delta]. Respectively the
                       * scale factor for initial \mu, stopping thresholds for ||J^T e||_inf, ||Dp||_2 and ||e||_2 and
                       * the step used in difference approximation to the jacobian. Set to NULL for defaults to be used.
                       * If \delta<0, the jacobian is approximated with central differences which are more accurate
                       * (but slower!) compared to the forward differences employed by default. 
                       */
  LM_REAL info[LM_INFO_SZ],
					           /* O: information regarding the minimization. Set to NULL if don't care
                      * info[0]= ||e||_2 at initial p.
                      * info[1-4]=[ ||e||_2, ||J^T e||_inf,  ||Dp||_2, mu/max[J^T J]_ii ], all computed at estimated p.
                      * info[5]= # iterations,
                      * info[6]=reason for terminating: 1 - stopped by small gradient J^T e
                      *                                 2 - stopped by small Dp
                      *                                 3 - stopped by itmax
                      *                                 4 - singular matrix. Restart from current p with increased mu 
                      *                                 5 - no further error reduction is possible. Restart with increased mu
                      *                                 6 - stopped by small ||e||_2
                      * info[7]= # function evaluations
                      * info[8]= # jacobian evaluations
                      */
  LM_REAL *work,     /* working memory, allocate if NULL */
  LM_REAL *covar,    /* O: Covariance matrix corresponding to LS solution; mxm. Set to NULL if not needed. */
  void *adata)       /* pointer to possibly additional data, passed uninterpreted to func.
                      * Set to NULL if not needed
                      */
{
  struct LMLEC_DATA data;
  LM_REAL *ptr, *Z, *pp, *p0, *Zimm; /* Z is mxmm */
  int mm, ret;
  register int i, j;
  register LM_REAL tmp;
  LM_REAL locinfo[LM_INFO_SZ];

  mm=m-k;

  ptr=(LM_REAL *)malloc((2*m + m*mm + mm)*sizeof(LM_REAL));
  if(!ptr){
    fprintf(stderr, LCAT(LEVMAR_LEC_DIF, "(): memory allocation request failed\n"));
    exit(1);
  }
  data.p=p;
  p0=ptr;
  data.c=p0+m;
  data.Z=Z=data.c+m;
  data.jac=NULL;
  pp=data.Z+m*mm;
  data.ncnstr=k;
  data.func=func;
  data.jacf=NULL;
  data.adata=adata;

  LMLEC_ELIM(A, b, data.c, NULL, Z, k, m); // compute c, Z

  /* compute pp s.t. p = c + Z*pp or (Z^T Z)*pp=Z^T*(p-c)
   * Due to orthogonality, Z^T Z = I and the last equation
   * becomes pp=Z^T*(p-c). Also, save the starting p in p0
   */
  for(i=0; i<m; ++i){
    p0[i]=p[i];
    p[i]-=data.c[i];
  }

  /* Z^T*(p-c) */
  for(i=0; i<mm; ++i){
    for(j=0, tmp=0.0; j<m; ++j)
      tmp+=Z[j*mm+i]*p[j];
    pp[i]=tmp;
  }

  /* compute the p corresponding to pp (i.e. c + Z*pp) and compare with p0 */
  for(i=0; i<m; ++i){
    Zimm=Z+i*mm;
    for(j=0, tmp=data.c[i]; j<mm; ++j)
      tmp+=Zimm[j]*pp[j]; // tmp+=Z[i*mm+j]*pp[j];
    if(FABS(tmp-p0[i])>CNST(1E-03))
      fprintf(stderr, RCAT("Warning: component %d of starting point not feasible in ", LEVMAR_LEC_DIF) "()! [%.10g reset to %.10g]\n",
                      i, p0[i], tmp);
  }

  if(!info) info=locinfo; /* make sure that LEVMAR_DIF() is called with non-null info */
  /* note that covariance computation is not requested from LEVMAR_DIF() */
  ret=LEVMAR_DIF(LMLEC_FUNC, pp, x, mm, n, itmax, opts, info, work, NULL, (void *)&data);

  /* p=c + Z*pp */
  for(i=0; i<m; ++i){
    Zimm=Z+i*mm;
    for(j=0, tmp=data.c[i]; j<mm; ++j)
      tmp+=Zimm[j]*pp[j]; // tmp+=Z[i*mm+j]*pp[j];
    p[i]=tmp;
  }

  /* compute the jacobian with finite differences and use it to estimate the covariance */
  if(covar){
    LM_REAL *hx, *wrk, *jac;

    hx=(LM_REAL *)malloc((2*n+n*m)*sizeof(LM_REAL));
    if(!work){
      fprintf(stderr, LCAT(LEVMAR_LEC_DIF, "(): memory allocation request failed\n"));
      exit(1);
    }

    wrk=hx+n;
    jac=wrk+n;

    (*func)(p, hx, m, n, adata); /* evaluate function at p */
    FDIF_FORW_JAC_APPROX(func, p, hx, wrk, (LM_REAL)LM_DIFF_DELTA, jac, m, n, adata); /* compute the jacobian at p */
    TRANS_MAT_MAT_MULT(jac, covar, n, m, __BLOCKSZ__); /* covar = J^T J */
    LEVMAR_COVAR(covar, covar, info[1], m, n);
    free(hx);
  }

  free(ptr);

  return ret;
}
Esempio n. 8
0
/*
 * This function returns the solution of Ax = b
 *
 * The function employs LU decomposition followed by forward/back substitution (see 
 * also the LAPACK-based LU solver above)
 *
 * A is mxm, b is mx1
 *
 * The function returns 0 in case of error,
 * 1 if successfull
 *
 * This function is often called repetitively to solve problems of identical
 * dimensions. To avoid repetitive malloc's and free's, allocated memory is
 * retained between calls and free'd-malloc'ed when not of the appropriate size.
 * A call with NULL as the first argument forces this memory to be released.
 */
int AX_EQ_B_LU(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
{
__STATIC__ void *buf=NULL;
__STATIC__ int buf_sz=0;

register int i, j, k;
int *idx, maxi=-1, idx_sz, a_sz, work_sz, tot_sz;
LM_REAL *a, *work, max, sum, tmp;

#ifdef LINSOLVERS_RETAIN_MEMORY
    if(!A){
      if(buf) free(buf);
      buf_sz=0;
      return 1;
    }
#endif /* LINSOLVERS_RETAIN_MEMORY */
   
  /* calculate required memory size */
  idx_sz=m;
  a_sz=m*m;
  work_sz=m;
  tot_sz=idx_sz*sizeof(int) + (a_sz+work_sz)*sizeof(LM_REAL);

#ifdef LINSOLVERS_RETAIN_MEMORY
  if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
    if(buf) free(buf); /* free previously allocated memory */

    buf_sz=tot_sz;
    buf=(void *)malloc(tot_sz);
    if(!buf){
      fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n");
      exit(1);
    }
  }
#else
    buf_sz=tot_sz;
    buf=(void *)malloc(tot_sz);
    if(!buf){
      fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_LU) "() failed!\n");
      exit(1);
    }
#endif /* LINSOLVERS_RETAIN_MEMORY */

  idx=(int *)buf;
  a=(LM_REAL *)(idx + idx_sz);
  work=a + a_sz;

  /* avoid destroying A, B by copying them to a, x resp. */
  for(i=0; i<m; ++i){ // B & 1st row of A
    a[i]=A[i];
    x[i]=B[i];
  }
  for(  ; i<a_sz; ++i) a[i]=A[i]; // copy A's remaining rows
  /****
  for(i=0; i<m; ++i){
    for(j=0; j<m; ++j)
      a[i*m+j]=A[i*m+j];
    x[i]=B[i];
  }
  ****/

  /* compute the LU decomposition of a row permutation of matrix a; the permutation itself is saved in idx[] */
	for(i=0; i<m; ++i){
		max=0.0;
		for(j=0; j<m; ++j)
			if((tmp=FABS(a[i*m+j]))>max)
        max=tmp;
		  if(max==0.0){
        fprintf(stderr, RCAT("Singular matrix A in ", AX_EQ_B_LU) "()!\n");
#ifndef LINSOLVERS_RETAIN_MEMORY
        free(buf);
#endif

        return 0;
      }
		  work[i]=CNST(1.0)/max;
	}

	for(j=0; j<m; ++j){
		for(i=0; i<j; ++i){
			sum=a[i*m+j];
			for(k=0; k<i; ++k)
        sum-=a[i*m+k]*a[k*m+j];
			a[i*m+j]=sum;
		}
		max=0.0;
		for(i=j; i<m; ++i){
			sum=a[i*m+j];
			for(k=0; k<j; ++k)
        sum-=a[i*m+k]*a[k*m+j];
			a[i*m+j]=sum;
			if((tmp=work[i]*FABS(sum))>=max){
				max=tmp;
				maxi=i;
			}
		}
		if(j!=maxi){
			for(k=0; k<m; ++k){
				tmp=a[maxi*m+k];
				a[maxi*m+k]=a[j*m+k];
				a[j*m+k]=tmp;
			}
			work[maxi]=work[j];
		}
		idx[j]=maxi;
		if(a[j*m+j]==0.0)
      a[j*m+j]=LM_REAL_EPSILON;
		if(j!=m-1){
			tmp=CNST(1.0)/(a[j*m+j]);
			for(i=j+1; i<m; ++i)
        a[i*m+j]*=tmp;
		}
	}

  /* The decomposition has now replaced a. Solve the linear system using
   * forward and back substitution
   */
	for(i=k=0; i<m; ++i){
		j=idx[i];
		sum=x[j];
		x[j]=x[i];
		if(k!=0)
			for(j=k-1; j<i; ++j)
        sum-=a[i*m+j]*x[j];
		else
      if(sum!=0.0)
			  k=i+1;
		x[i]=sum;
	}

	for(i=m-1; i>=0; --i){
		sum=x[i];
		for(j=i+1; j<m; ++j)
      sum-=a[i*m+j]*x[j];
		x[i]=sum/a[i*m+i];
	}

#ifndef LINSOLVERS_RETAIN_MEMORY
  free(buf);
#endif

  return 1;
}
Esempio n. 9
0
/*
 * This function returns the solution of Ax = b
 *
 * The function is based on SVD decomposition:
 * If A=U D V^T with U, V orthogonal and D diagonal, the linear system becomes
 * (U D V^T) x = b or x=V D^{-1} U^T b
 * Note that V D^{-1} U^T is the pseudoinverse A^+
 *
 * A is mxm, b is mx1.
 *
 * The function returns 0 in case of error, 1 if successfull
 *
 * This function is often called repetitively to solve problems of identical
 * dimensions. To avoid repetitive malloc's and free's, allocated memory is
 * retained between calls and free'd-malloc'ed when not of the appropriate size.
 * A call with NULL as the first argument forces this memory to be released.
 */
int AX_EQ_B_SVD(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
{
__STATIC__ LM_REAL *buf=NULL;
__STATIC__ int buf_sz=0;
static LM_REAL eps=CNST(-1.0);

register int i, j;
LM_REAL *a, *u, *s, *vt, *work;
int a_sz, u_sz, s_sz, vt_sz, tot_sz;
LM_REAL thresh, one_over_denom;
register LM_REAL sum;
int info, rank, worksz, *iwork, iworksz;
   
#ifdef LINSOLVERS_RETAIN_MEMORY
    if(!A){
      if(buf) free(buf);
      buf_sz=0;
      return 1;
    }
#endif /* LINSOLVERS_RETAIN_MEMORY */
   
  /* calculate required memory size */
  worksz=16*m; /* more than needed */
  iworksz=8*m;
  a_sz=m*m;
  u_sz=m*m; s_sz=m; vt_sz=m*m;

  tot_sz=iworksz*sizeof(int) + (a_sz + u_sz + s_sz + vt_sz + worksz)*sizeof(LM_REAL);

#ifdef LINSOLVERS_RETAIN_MEMORY
  if(tot_sz>buf_sz){ /* insufficient memory, allocate a "big" memory chunk at once */
    if(buf) free(buf); /* free previously allocated memory */

    buf_sz=tot_sz;
    buf=(LM_REAL *)malloc(buf_sz);
    if(!buf){
      fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_SVD) "() failed!\n");
      exit(1);
    }
  }
#else
    buf_sz=tot_sz;
    buf=(LM_REAL *)malloc(buf_sz);
    if(!buf){
      fprintf(stderr, RCAT("memory allocation in ", AX_EQ_B_SVD) "() failed!\n");
      exit(1);
    }
#endif /* LINSOLVERS_RETAIN_MEMORY */

  iwork=(int *)buf;
  a=(LM_REAL *)(iwork+iworksz);
  /* store A (column major!) into a */
  for(i=0; i<m; i++)
    for(j=0; j<m; j++)
      a[i+j*m]=A[i*m+j];

  u=a + a_sz;
  s=u+u_sz;
  vt=s+s_sz;
  work=vt+vt_sz;

  /* SVD decomposition of A */
  GESVD("A", "A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, &info);
  //GESDD("A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, iwork, &info);

  /* error treatment */
  if(info!=0){
    if(info<0){
      fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GESVD), "/" GESDD) " in ", AX_EQ_B_SVD) "()\n", -info);
      exit(1);
    }
    else{
      fprintf(stderr, RCAT("LAPACK error: dgesdd (dbdsdc)/dgesvd (dbdsqr) failed to converge in ", AX_EQ_B_SVD) "() [info=%d]\n", info);
#ifndef LINSOLVERS_RETAIN_MEMORY
      free(buf);
#endif

      return 0;
    }
  }

  if(eps<0.0){
    LM_REAL aux;

    /* compute machine epsilon */
    for(eps=CNST(1.0); aux=eps+CNST(1.0), aux-CNST(1.0)>0.0; eps*=CNST(0.5))
                                          ;
    eps*=CNST(2.0);
  }

  /* compute the pseudoinverse in a */
	for(i=0; i<a_sz; i++) a[i]=0.0; /* initialize to zero */
  for(rank=0, thresh=eps*s[0]; rank<m && s[rank]>thresh; rank++){
    one_over_denom=CNST(1.0)/s[rank];

    for(j=0; j<m; j++)
      for(i=0; i<m; i++)
        a[i*m+j]+=vt[rank+i*m]*u[j+rank*m]*one_over_denom;
  }

	/* compute A^+ b in x */
	for(i=0; i<m; i++){
	  for(j=0, sum=0.0; j<m; j++)
      sum+=a[i*m+j]*B[j];
    x[i]=sum;
  }

#ifndef LINSOLVERS_RETAIN_MEMORY
  free(buf);
#endif

	return 1;
}
Esempio n. 10
0
       {{ b1,b2,b3,b4,b5,b6,b7,b8,b9,b10,b11,b12,b13,b14,b15,b16 }}
#else
#define D(b1,b2,b3,b4,b5,b6,b7,b8,b9,b10,b11,b12,b13,b14,b15,b16) \
       {{ b16,b15,b14,b13,b12,b11,b10,b9,b8,b7,b6,b5,b4,b3,b2,b1 }}
#endif

/* Table of user visible constants
 */
const struct cnsts cnsts[] = {
#if ! defined(REALBUILD) || defined(COMPILE_CATALOGUES)
#define CNST(n, index, fn, alias) { index, fn, alias },
#else
#define CNST(n, index, fn, alias) { index, fn },
#endif

	CNST(OP_HALF, 0x00, "1/2", "HALF")
	CNST(OP_PC_a, 0x01, "a", CNULL)
	CNST(OP_PC_a0, 0x02, "a\270", "a0")
	CNST(OP_PC_SM_luna, 0x03, "a\033", "SM_luna")
	CNST(OP_PC_SM_terra, 0x04, "a\256", "SM_terra")
	CNST(OP_PC_C, 0x05, "c", "C")
	CNST(OP_PC_C1, 0x06, "c\271", "C1")
	CNST(OP_PC_C2, 0x07, "c\272", "C2")
	CNST(OP_PC_eV, 0x08, "e", "eV")
	CNST(OP_CNSTE, 0x82, "eE", "CNSTE")
	CNST(OP_PC_F, 0x09, "F", CNULL)
	CNST(OP_PC_F_alpha, 0x83, "F\240", "F_alpha")
	CNST(OP_PC_F_delta, 0x84, "F\243", "F_delta")
	CNST(OP_PC_g, 0x0a, "g", CNULL)
	CNST(OP_PC_G, 0x0b, "G", CNULL)
	CNST(OP_PC_Go, 0x0c, "G\270", "Go")
Esempio n. 11
0
/*
 * This function computes the inverse of A in B. A and B can coincide
 *
 * The function employs LAPACK-free LU decomposition of A to solve m linear
 * systems A*B_i=I_i, where B_i and I_i are the i-th columns of B and I.
 *
 * A and B are mxm
 *
 * The function returns 0 in case of error,
 * 1 if successfull
 *
 */
static int LEVMAR_LUINVERSE(LM_REAL *A, LM_REAL *B, int m)
{
void *buf=NULL;
int buf_sz=0;

register int i, j, k, l;
int *idx, maxi=-1, idx_sz, a_sz, x_sz, work_sz, tot_sz;
LM_REAL *a, *x, *work, max, sum, tmp;

  /* calculate required memory size */
  idx_sz=m;
  a_sz=m*m;
  x_sz=m;
  work_sz=m;
  tot_sz=idx_sz*sizeof(int) + (a_sz+x_sz+work_sz)*sizeof(LM_REAL);

  buf_sz=tot_sz;
  buf=(void *)malloc(tot_sz);
  if(!buf){
    fprintf(stderr, RCAT("memory allocation in ", LEVMAR_LUINVERSE) "() failed!\n");
    exit(1);
  }

  idx=(int *)buf;
  a=(LM_REAL *)(idx + idx_sz);
  x=a + a_sz;
  work=x + x_sz;

  /* avoid destroying A by copying it to a */
  for(i=0; i<a_sz; ++i) a[i]=A[i];

  /* compute the LU decomposition of a row permutation of matrix a; the permutation itself is saved in idx[] */
	for(i=0; i<m; ++i){
		max=0.0;
		for(j=0; j<m; ++j)
			if((tmp=FABS(a[i*m+j]))>max)
        max=tmp;
		  if(max==0.0){
        fprintf(stderr, RCAT("Singular matrix A in ", LEVMAR_LUINVERSE) "()!\n");
        free(buf);

        return 0;
      }
		  work[i]=CNST(1.0)/max;
	}

	for(j=0; j<m; ++j){
		for(i=0; i<j; ++i){
			sum=a[i*m+j];
			for(k=0; k<i; ++k)
        sum-=a[i*m+k]*a[k*m+j];
			a[i*m+j]=sum;
		}
		max=0.0;
		for(i=j; i<m; ++i){
			sum=a[i*m+j];
			for(k=0; k<j; ++k)
        sum-=a[i*m+k]*a[k*m+j];
			a[i*m+j]=sum;
			if((tmp=work[i]*FABS(sum))>=max){
				max=tmp;
				maxi=i;
			}
		}
		if(j!=maxi){
			for(k=0; k<m; ++k){
				tmp=a[maxi*m+k];
				a[maxi*m+k]=a[j*m+k];
				a[j*m+k]=tmp;
			}
			work[maxi]=work[j];
		}
		idx[j]=maxi;
		if(a[j*m+j]==0.0)
      a[j*m+j]=LM_REAL_EPSILON;
		if(j!=m-1){
			tmp=CNST(1.0)/(a[j*m+j]);
			for(i=j+1; i<m; ++i)
        a[i*m+j]*=tmp;
		}
	}

  /* The decomposition has now replaced a. Solve the m linear systems using
   * forward and back substitution
   */
  for(l=0; l<m; ++l){
    for(i=0; i<m; ++i) x[i]=0.0;
    x[l]=CNST(1.0);

	  for(i=k=0; i<m; ++i){
		  j=idx[i];
		  sum=x[j];
		  x[j]=x[i];
		  if(k!=0)
			  for(j=k-1; j<i; ++j)
          sum-=a[i*m+j]*x[j];
		  else
        if(sum!=0.0)
			    k=i+1;
		  x[i]=sum;
	  }

	  for(i=m-1; i>=0; --i){
		  sum=x[i];
		  for(j=i+1; j<m; ++j)
        sum-=a[i*m+j]*x[j];
		  x[i]=sum/a[i*m+i];
	  }

    for(i=0; i<m; ++i)
      B[i*m+l]=x[i];
  }

  free(buf);

  return 1;
}
Esempio n. 12
0
/*
 * This function computes the pseudoinverse of a square matrix A
 * into B using SVD. A and B can coincide
 * 
 * The function returns 0 in case of error (e.g. A is singular),
 * the rank of A if successfull
 *
 * A, B are mxm
 *
 */
static int LEVMAR_PSEUDOINVERSE(LM_REAL *A, LM_REAL *B, int m)
{
LM_REAL *buf=NULL;
int buf_sz=0;
static LM_REAL eps=CNST(-1.0);

register int i, j;
LM_REAL *a, *u, *s, *vt, *work;
int a_sz, u_sz, s_sz, vt_sz, tot_sz;
LM_REAL thresh, one_over_denom;
int info, rank, worksz, *iwork, iworksz;
   
  /* calculate required memory size */
  worksz=16*m; /* more than needed */
  iworksz=8*m;
  a_sz=m*m;
  u_sz=m*m; s_sz=m; vt_sz=m*m;

  tot_sz=iworksz*sizeof(int) + (a_sz + u_sz + s_sz + vt_sz + worksz)*sizeof(LM_REAL);

    buf_sz=tot_sz;
    buf=(LM_REAL *)malloc(buf_sz);
    if(!buf){
      fprintf(stderr, RCAT("memory allocation in ", LEVMAR_PSEUDOINVERSE) "() failed!\n");
      exit(1);
    }

  iwork=(int *)buf;
  a=(LM_REAL *)(iwork+iworksz);
  /* store A (column major!) into a */
  for(i=0; i<m; i++)
    for(j=0; j<m; j++)
      a[i+j*m]=A[i*m+j];

  u=a + a_sz;
  s=u+u_sz;
  vt=s+s_sz;
  work=vt+vt_sz;

  /* SVD decomposition of A */
  GESVD("A", "A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, &info);
  //GESDD("A", (int *)&m, (int *)&m, a, (int *)&m, s, u, (int *)&m, vt, (int *)&m, work, (int *)&worksz, iwork, &info);

  /* error treatment */
  if(info!=0){
    if(info<0){
      fprintf(stderr, RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %d of ", GESVD), "/" GESDD) " in ", LEVMAR_PSEUDOINVERSE) "()\n", -info);
      exit(1);
    }
    else{
      fprintf(stderr, RCAT("LAPACK error: dgesdd (dbdsdc)/dgesvd (dbdsqr) failed to converge in ", LEVMAR_PSEUDOINVERSE) "() [info=%d]\n", info);
      free(buf);

      return 0;
    }
  }

  if(eps<0.0){
    LM_REAL aux;

    /* compute machine epsilon */
    for(eps=CNST(1.0); aux=eps+CNST(1.0), aux-CNST(1.0)>0.0; eps*=CNST(0.5))
                                          ;
    eps*=CNST(2.0);
  }

  /* compute the pseudoinverse in B */
	for(i=0; i<a_sz; i++) B[i]=0.0; /* initialize to zero */
  for(rank=0, thresh=eps*s[0]; rank<m && s[rank]>thresh; rank++){
    one_over_denom=CNST(1.0)/s[rank];

    for(j=0; j<m; j++)
      for(i=0; i<m; i++)
        B[i*m+j]+=vt[rank+i*m]*u[j+rank*m]*one_over_denom;
  }

  free(buf);

	return rank;
}
Esempio n. 13
0
void LEVMAR_CHKJAC(
    void (*func)(LM_REAL *p, LM_REAL *hx, int m, int n, void *adata),
    void (*jacf)(LM_REAL *p, LM_REAL *j, int m, int n, void *adata),
    LM_REAL *p, int m, int n, void *adata, LM_REAL *err)
{
LM_REAL factor=CNST(100.0);
LM_REAL one=CNST(1.0);
LM_REAL zero=CNST(0.0);
LM_REAL *fvec, *fjac, *pp, *fvecp, *buf;

register int i, j;
LM_REAL eps, epsf, temp, epsmch;
LM_REAL epslog;
int fvec_sz=n, fjac_sz=n*m, pp_sz=m, fvecp_sz=n;

  epsmch=LM_REAL_EPSILON;
  eps=(LM_REAL)sqrt(epsmch);

  buf=(LM_REAL *)malloc((fvec_sz + fjac_sz + pp_sz + fvecp_sz)*sizeof(LM_REAL));
  if(!buf){
    fprintf(stderr, LCAT(LEVMAR_CHKJAC, "(): memory allocation request failed\n"));
    exit(1);
  }
  fvec=buf;
  fjac=fvec+fvec_sz;
  pp=fjac+fjac_sz;
  fvecp=pp+pp_sz;

  /* compute fvec=func(p) */
  (*func)(p, fvec, m, n, adata);

  /* compute the jacobian at p */
  (*jacf)(p, fjac, m, n, adata);

  /* compute pp */
  for(j=0; j<m; ++j){
    temp=eps*FABS(p[j]);
    if(temp==zero) temp=eps;
    pp[j]=p[j]+temp;
  }

  /* compute fvecp=func(pp) */
  (*func)(pp, fvecp, m, n, adata);

  epsf=factor*epsmch;
  epslog=(LM_REAL)log10(eps);

  for(i=0; i<n; ++i)
    err[i]=zero;

  for(j=0; j<m; ++j){
    temp=FABS(p[j]);
    if(temp==zero) temp=one;

    for(i=0; i<n; ++i)
      err[i]+=temp*fjac[i*m+j];
  }

  for(i=0; i<n; ++i){
    temp=one;
    if(fvec[i]!=zero && fvecp[i]!=zero && FABS(fvecp[i]-fvec[i])>=epsf*FABS(fvec[i]))
        temp=eps*FABS((fvecp[i]-fvec[i])/eps - err[i])/(FABS(fvec[i])+FABS(fvecp[i]));
    err[i]=one;
    if(temp>epsmch && temp<eps)
        err[i]=((LM_REAL)log10(temp) - epslog)/epslog;
    if(temp>=eps) err[i]=zero;
  }

  free(buf);

  return;
}