示例#1
0
/* compute the norm of a vector in a manner that avoids overflows
 */
static LM_REAL VECNORM(LM_REAL *x, int n)
{
#ifdef HAVE_LAPACK
#define NRM2 LM_MK_BLAS_NAME(nrm2)
extern LM_REAL NRM2(int *n, LM_REAL *dx, int *incx);
int one=1;

  return NRM2(&n, x, &one);
#undef NRM2
#else // no LAPACK, use the simple method described by Blue in TOMS78
register int i;
LM_REAL max, sum, tmp;

  for(i=n, max=0.0; i-->0; )
    if(x[i]>max) max=x[i];
    else if(x[i]<-max) max=-x[i];

  for(i=n, sum=0.0; i-->0; ){
    tmp=x[i]/max;
    sum+=tmp*tmp;
  }

  return max*(LM_REAL)sqrt(sum);
#endif /* HAVE_LAPACK */
}
//=========================================================================
double  Epetra_SerialDenseVector::Norm2() const {

  // 2-norm of vector

  double result = NRM2(Length(), Values());

  UpdateFlops(2*Length());

  return(result);
}
void minimize_dual(DOUBLE *Xopt, DOUBLE *Xorig, INT length, DOUBLE *SSt, DOUBLE *SXt, DOUBLE *SXtXSt, DOUBLE trXXt, \
					DOUBLE c, INT N, INT K) {

	DOUBLE INTERV = 0.1;
	DOUBLE EXT = 3.0;   
	INT MAX = 20;       
	DOUBLE RATIO = (DOUBLE) 10;  
	DOUBLE SIG = 0.1; 
	DOUBLE RHO = SIG / (DOUBLE) 2;
	INT MN = K * 1;
	
	CHAR lamch_opt = 'U';
	DOUBLE realmin = LAMCH(&lamch_opt);

	DOUBLE red = 1;

	INT i = 0;
	INT ls_failed = 0;
	DOUBLE f0;
	DOUBLE *df0 = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE));
	DOUBLE *dftemp = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE));
	DOUBLE *df3 = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE));
	DOUBLE *s = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE));
	DOUBLE d0;
	INT derivFlag = 1;

	DOUBLE *X = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE));
	datacpy(X, Xorig, MN);
	
	INT maxNK = IMAX(N, K);
	DOUBLE *SStLambda = (DOUBLE *) MALLOC(maxNK * K * sizeof(DOUBLE));
	DOUBLE *tempMatrix = (DOUBLE *) MALLOC(maxNK * K * sizeof(DOUBLE));
	
	dual_obj_grad(&f0, df0, X, SSt, SXt, SXtXSt, trXXt, c, N, K, derivFlag, SStLambda, tempMatrix);
	
	INT incx = 1;
	INT incy = 1;
		
	datacpy(s, df0, MN);
	DOUBLE alpha = -1;
	SCAL(&MN, &alpha, s, &incx);
	
	d0 = - DOT(&MN, s, &incx, s, &incy);
	
	DOUBLE x1;
	DOUBLE x2;
	DOUBLE x3;
	DOUBLE x4;
	DOUBLE *X0 = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE));
	DOUBLE *X3 = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE));
	DOUBLE F0;
	DOUBLE *dF0 = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE));
	INT Mmin;
	DOUBLE f1;
	DOUBLE f2;
	DOUBLE f3;
	DOUBLE f4;
	DOUBLE d1;
	DOUBLE d2;
	DOUBLE d3;
	DOUBLE d4;
	INT success;
	DOUBLE A;
	DOUBLE B;
	DOUBLE sqrtquantity;
	DOUBLE tempnorm;
	DOUBLE tempinprod1;
	DOUBLE tempinprod2;
	DOUBLE tempscalefactor;
	
	x3 = red / (1 - d0);            

	while (i++ < length) {
		datacpy(X0, X, MN);
		datacpy(dF0, df0, MN);
		F0 = f0;
		Mmin = MAX;
		
		while (1) {
			x2 = 0;
			f2 = f0;
			d2 = d0;
			f3 = f0;
			
			datacpy(df3, df0, MN);
			
			success = 0;
			while ((!success) && (Mmin > 0)) {
				Mmin = Mmin - 1;
				
				datacpy(X3, X, MN);
				alpha = x3;
				AXPY(&MN, &alpha, s, &incx, X3, &incy);
				
				dual_obj_grad(&f3, df3, X3, SSt, SXt, SXtXSt, trXXt, c, N, K, derivFlag, SStLambda, tempMatrix);	

				if (ISNAN(f3) || ISINF(f3)) {  /* any(isnan(df3)+isinf(df3)) */
					x3 = (x2 + x3) * 0.5;
				} else {
					success = 1;
				}
			}
			
			if (f3 < F0) {
				datacpy(X0, X, MN);
				alpha = x3;
				AXPY(&MN, &alpha, s, &incx, X0, &incy);
				datacpy(dF0, df3, MN);
				F0 = f3;
			}	
			
			d3 = DOT(&MN, df3, &incx, s, &incy);

			if ((d3 > SIG * d0) || (f3 > f0 + x3 * RHO * d0) || (Mmin == 0)) {
				break;
			}
			
			x1 = x2; 
			f1 = f2; 
			d1 = d2;
			x2 = x3; 
			f2 = f3; 
			d2 = d3;
			A = 6 * (f1 - f2) + 3 * (d2 + d1) * (x2 - x1);
			B = 3 * (f2 - f1) - (2 * d1 + d2) * (x2 - x1);
			sqrtquantity = B * B - A * d1 * (x2 - x1);

			if (sqrtquantity < 0) {
				x3 = x2 * EXT;
			} else {
				x3 = x1 - d1 * SQR(x2 - x1) / (B + SQRT(sqrtquantity));
				if (ISNAN(x3) || ISINF(x3) || (x3 < 0)) {
					x3 = x2 * EXT;
				} else if (x3 > x2 * EXT) {
					x3 = x2 * EXT;
				} else if (x3 < x2 + INTERV * (x2 - x1)) {
					x3 = x2 + INTERV * (x2 - x1);
				}
			}		
		}                
	
		while (((ABS(d3) > - SIG * d0) || (f3 > f0 + x3 * RHO * d0)) && (Mmin > 0)) {
			if ((d3 > 0) || (f3 > f0 + x3 * RHO * d0)) {
				x4 = x3;
				f4 = f3;
				d4 = d3;
			} else {
				x2 = x3;
				f2 = f3;
				d2 = d3;
			}

			if (f4 > f0) {
				x3 = x2 - (0.5 * d2 * SQR(x4 - x2)) / (f4 - f2 - d2 * (x4 - x2));
			} else {
				A = 6 * (f2 - f4) / (x4 - x2) + 3 * (d4 + d2);
				B = 3 * (f4 - f2) - (2 * d2 + d4) * (x4 - x2);
				x3 = x2 + (SQRT(B * B - A * d2 * SQR(x4 - x2)) - B) / A;
			}

			if (ISNAN(x3) || ISINF(x3)) {
				x3 = (x2 + x4) * 0.5;
			}
			x3 = IMAX(IMIN(x3, x4 - INTERV * (x4 - x2)), x2 + INTERV * (x4 - x2));

			datacpy(X3, X, MN);
			alpha = x3;
			AXPY(&MN, &alpha, s, &incx, X3, &incy);			

			dual_obj_grad(&f3, df3, X3, SSt, SXt, SXtXSt, trXXt, c, N, K, derivFlag, SStLambda, tempMatrix);

			if (f3 < F0) {
				datacpy(X0, X, MN);
				alpha = x3;
				AXPY(&MN, &alpha, s, &incx, X0, &incy);
				datacpy(dF0, df3, MN);
				F0 = f3;
			}

			Mmin = Mmin - 1;
			d3 = DOT(&MN, df3, &incx, s, &incy);
			
		}
		
		if ((ABS(d3) < - SIG * d0) && (f3 < f0 + x3 * RHO * d0)) {
			alpha = x3;
			AXPY(&MN, &alpha, s, &incx, X, &incy);
			f0 = f3;
			datacpy(dftemp, df3, MN);
			alpha = -1;
			AXPY(&MN, &alpha, df0, &incx, dftemp, &incy);
			tempinprod1 = DOT(&MN, dftemp, &incx, df3, &incy);
			tempnorm = NRM2(&MN, df0, &incx);
			tempinprod2 = SQR(tempnorm);
			tempscalefactor = tempinprod1 / tempinprod2;

			alpha = tempscalefactor;
			SCAL(&MN, &alpha, s, &incx);
			alpha = -1;
			AXPY(&MN, &alpha, df3, &incx, s, &incy);
			datacpy(df0, df3, MN);
			d3 = d0;
			d0 = DOT(&MN, df0, &incx, s, &incy);

			if (d0 > 0) {
				datacpy(s, df0, MN);
				alpha = -1;
				SCAL(&MN, &alpha, s, &incx);
				tempnorm = NRM2(&MN, s, &incx);
				d0 = - SQR(tempnorm);
			}
			x3 = x3 * IMIN(RATIO, d3 / (d0 - realmin));
			ls_failed = 0;
		} else {
			datacpy(X, X0, MN);
			datacpy(df0, dF0, MN);
			f0 = F0;
			
			if ((ls_failed == 1) || (i > length)) {
				break;
			}
			
			datacpy(s, df0, MN);
			alpha = -1;
			SCAL(&MN, &alpha, s, &incx);
			tempnorm = NRM2(&MN, s, &incx);
			d0 = - SQR(tempnorm);
			x3 = 1 / (1 - d0);
			
			ls_failed = 1;
		}
	}

	datacpy(Xopt, X, MN);
	
	FREE(SStLambda);
	FREE(tempMatrix);
	FREE(df0);
	FREE(dftemp);
	FREE(df3);
	FREE(s);
	FREE(X);
	FREE(X0);
	FREE(X3);
	FREE(dF0);
}
void dual_obj_grad(DOUBLE *obj, DOUBLE *deriv, DOUBLE *dualLambda, DOUBLE *SSt, DOUBLE *SXt, DOUBLE *SXtXSt, DOUBLE trXXt, \
					DOUBLE c, INT N, INT K, INT derivFlag, DOUBLE *SStLambda, DOUBLE *tempMatrix) {
	
	INT maxNK = IMAX(N, K);
	INT SStLambdaFlag = 0;
	if (SStLambda == NULL) { 
		SStLambda = (DOUBLE *) MALLOC(maxNK * K * sizeof(DOUBLE));
		SStLambdaFlag = 1;
	}

	INT tempMatrixFlag = 0;
	if (tempMatrix == NULL) { 
		tempMatrix = (DOUBLE *) MALLOC(maxNK * K * sizeof(DOUBLE));
		tempMatrixFlag = 1;
	}
	
	datacpy(SStLambda, SSt, K * K);
	
	INT iterK;
	
/*
	#pragma omp parallel for private(iterK) shared(SStLambda, dualLambda, K)
*/
	for (iterK = 0; iterK < K; ++iterK) {
		SStLambda[iterK * K + iterK] += dualLambda[iterK];
	}
	
	CHAR uplo = 'U';
	INT POTRSN = K;
	INT POTRSLDA = K;
	INT INFO;
	
	POTRF(&uplo, &POTRSN, SStLambda, &POTRSLDA, &INFO);
	
	datacpy(tempMatrix, SXtXSt, K * K);
	
	INT POTRSNRHS = K;
	INT POTRSLDB = K;
	
	POTRS(&uplo, &POTRSN, &POTRSNRHS, SStLambda, &POTRSLDA, tempMatrix, &POTRSLDB, &INFO);

	DOUBLE objTemp = 0;
	
/*
	#pragma omp parallel for private(iterK) shared(tempMatrix, K) reduction(-: objTemp)
*/
	for (iterK = 0; iterK < K; ++iterK) {
		objTemp = objTemp - tempMatrix[iterK * K + iterK];
	}
	
	INT ASUMN = K;
	INT incx = 1;
	DOUBLE sumDualLambda = ASUM(&ASUMN, dualLambda, &incx);
	
	objTemp += trXXt - c * sumDualLambda;
	*obj = - objTemp;

	if (derivFlag == 1) {
		
		datacpy(tempMatrix, SXt, K * N);
		
		POTRSNRHS = N;
		POTRSLDB = K;
	
		POTRS(&uplo, &POTRSN, &POTRSNRHS, SStLambda, &POTRSLDA, tempMatrix, &POTRSLDB, &INFO);
		
		transpose(tempMatrix, SStLambda, K, N);
		
		INT NRM2N = N;
		DOUBLE tempNorm;
		#pragma omp parallel for private(iterK, tempNorm) shared(SStLambda, deriv, K, c)
		for (iterK = 0; iterK < K; ++iterK) {
			tempNorm = NRM2(&NRM2N, &SStLambda[iterK * N], &incx);
			deriv[iterK] = - SQR(tempNorm) + c;
		}
	}
	
	if (SStLambdaFlag == 1) {
		FREE(SStLambda);
	}
	
	if (tempMatrixFlag == 1) {
		FREE(tempMatrix);
	}
}
示例#5
0
文件: lcg.c 项目: advanpix/OptimPack
/*
 *  ... RHO[0] = RHO_PREV
 *  ... RHO[1] = RHO
 *  ... RHO[2] = ALPHA
 *  ... RHO[3] = BETA
 *  ... RHO[4] = |X|
 */
void
OPK_TRCG(const opk_index_t n, real_t p[], const real_t q[], real_t r[],
         real_t x[], const real_t z[], const real_t delta,
         real_t rho[5], opk_cg_state_t *state)
{
  const real_t zero = OPK_REALCONST(0.0);
  const real_t one  = OPK_REALCONST(1.0);
  real_t a, b, c, d, e, xn, sum, tmp;
  real_t pq, alpha, beta;
  long i;

  if (delta <= zero) {
    *state = OPK_CG_ERROR;
    return;
  }

  switch (*state) {

  case OPK_CG_START:

    /* Start with no initial guess: fill x with zeros,
       there is no needs to compute q = A.x0 since it is 0. */
    ZERO(n, x);
    rho[0] = rho[1] = rho[2] = rho[3] = rho[4] = zero;
    *state = OPK_CG_NEWX;
    return;

  case OPK_CG_RESTART:

    /* Start or restart with initial x given: copy initial x into p and
       request caller to compte: q = A.p */
    rho[0] = rho[1] = rho[2] = rho[3] = zero;
    xn = NRM2(n, x);
    if (xn >= delta) {
      if (xn > delta) {
        SCAL(n, delta/xn, x);
      }
      rho[4] = delta; /* |X| */
      *state = OPK_CG_TRUNCATED;
    } else {
      rho[4] = xn; /* |X| */
      COPY(n, x, p);
      *state = OPK_CG_AP;
    }
    return;

  case OPK_CG_NEWX:

    if (z == NULL) {
      /* No preconditioning.  Take z = r and jump to next case to compute
         conjugate gradient direction. */
      z = r;
    } else {
      /* Preconditioned version of the algorithm.  Request caller to compute
         preconditioned residuals. */
      *state = OPK_CG_PRECOND;
      return;
    }

  case OPK_CG_PRECOND:

    /* Caller has been requested to compute preconditioned residuals.  Use
       conjugate gradients recurrence to compute next search direption p. */
    rho[1] = DOT(n, r, z);
    if (rho[1] <= zero) {
      /* If r'.z too small, then algorithm has converged
         or preconditioner is not positive definite. */
      *state = (rho[1] < zero ? OPK_CG_NON_CONVEX : OPK_CG_FINISH);
      return;
    }
    if (rho[0] > zero) {
      beta = rho[1]/rho[0];
      for (i = 0; i < n; ++i) {
        p[i] = z[i] + beta*p[i];
      }
    } else {
      beta = zero;
      COPY(n, z, p);
    }
    rho[3] = beta;

    /* Request caller to compute: q = A.p */
    *state = OPK_CG_AP;
    return;

  case OPK_CG_AP:

    if (rho[1] > zero) {
      /* Caller has been requested to compute q = A.p.  Compute optimal step
         size and update the variables and the residuals. */
      pq = DOT(n, p, q);
      if (pq > zero) {
        alpha = rho[1]/pq; /* optimal step size */
        rho[2] = alpha; /* memorize optimal step size */
        if (alpha == zero) {
          /* If alpha too small, then algorithm has converged. */
          *state = OPK_CG_FINISH;
          return;
        }
        sum = zero;
        for (i = 0; i < n; ++i) {
          tmp = x[i] + alpha*p[i];
          sum += tmp*tmp;
        }
        xn = SQRT(sum);
        if (xn <= delta) {
          /* Optimal step not too long, take it. */
          AXPY(n,  alpha, p, x);
          AXPY(n, -alpha, q, r);
          rho[0] = rho[1];
          rho[4] = xn;
          *state = (xn < delta ? OPK_CG_NEWX : OPK_CG_TRUNCATED);
          return;
        }
      }

      /* Operator A is not positive definite (P'.A.P < 0) or optimal step
         leads us ouside the trust region.  In these cases, we take a
         truncated step X + ALPHA*P along P so that |X + ALPHA*P| = DELTA with
         ALPHA >= 0.  This amounts to find the positive root of: A*ALPHA^2 +
         2*B*ALPHA + C with: A = |P|^2, B = X'.P, and C = |X|^2 - DELTA^2.
         Note that the reduced discriminant is D = B*B - A*C which expands to
         D = (DELTA^2 - |X|^2*sin(THETA)^2)*|P|^2 with THETA the angle between
         X and P, as DELTA > |X|, then D > 0 must hold. */
      a = DOT(n, p, p);
      if (a <= zero) {
        /* This can only occurs if P = 0 (and thus A = 0).  It is probably due
           to rounding errors and the algorithm should be restarted. */
        *state = OPK_CG_FINISH;
        return;
      }
      b = DOT(n, x, p);
      c = (rho[4] + delta)*(rho[4] - delta); /* RHO[4] = |X| */
      if (c >= zero) {
        /* This can only occurs if the caller has modified DELTA or RHO[4],
           which stores the norm of X, and thus is considered as an error. */
        *state = OPK_CG_ERROR;
        return;
      }
      /* Normalize the coefficients to avoid overflows. */
      d = FABS(a);
      if ((e = FABS(b)) > d) d = e;
      if ((e = FABS(c)) > d) d = e;
      d = one/d;
      a *= d;
      b *= d;
      c *= d;
      /* Compute the reduced discriminant. */
      d = b*b - a*c;
      if (d > zero) {
        /* The polynomial has two real roots of opposite signs (because C/A <
           0 by construction), ALPHA is the positive one.  Compute this root
           avoiding numerical errors (by adding numbers of same sign). */
        e = SQRT(d);
        if (b >= zero) {
          alpha = -c/(e + b);
        } else {
          alpha = (e - b)/a;
        }
      } else {
        /* D > 0 must hold.  There must be something wrong... */
        *state = OPK_CG_ERROR;
        return;
      }
      if (alpha > zero) {
        AXPY(n,  alpha, p, x);
        AXPY(n, -alpha, q, r);
      }
      rho[0] = rho[1];
      rho[2] = alpha; /* memorize optimal step size */
      rho[4] = delta;
      *state = OPK_CG_TRUNCATED;
      return;

    } else {

      /* Caller has been requested to compute q = A.x0
         Update the residuals. */
      for (i = 0; i < n; ++i) {
        r[i] -= q[i];
      }
      rho[2] = zero; /* ALPHA = 0 (no step has been taken yet) */
      rho[3] = zero; /* BETA = 0 (ditto) */

    }

    /* Variables X and residuals R available for inspection. */
    *state = OPK_CG_NEWX;
    return;


  case OPK_CG_FINISH:
  case OPK_CG_NON_CONVEX:
  case OPK_CG_TRUNCATED:

    /* The caller can restart the algorithm with final 'x', 'state' set to
       OPK_CG_RESTART and 'r' set to 'b' and, maybe, a larger 'delta'. */
    return;

  default:

    /* There must be something wrong... */
    *state = OPK_CG_ERROR;
    return;
  }

}