/* compute the norm of a vector in a manner that avoids overflows */ static LM_REAL VECNORM(LM_REAL *x, int n) { #ifdef HAVE_LAPACK #define NRM2 LM_MK_BLAS_NAME(nrm2) extern LM_REAL NRM2(int *n, LM_REAL *dx, int *incx); int one=1; return NRM2(&n, x, &one); #undef NRM2 #else // no LAPACK, use the simple method described by Blue in TOMS78 register int i; LM_REAL max, sum, tmp; for(i=n, max=0.0; i-->0; ) if(x[i]>max) max=x[i]; else if(x[i]<-max) max=-x[i]; for(i=n, sum=0.0; i-->0; ){ tmp=x[i]/max; sum+=tmp*tmp; } return max*(LM_REAL)sqrt(sum); #endif /* HAVE_LAPACK */ }
//========================================================================= double Epetra_SerialDenseVector::Norm2() const { // 2-norm of vector double result = NRM2(Length(), Values()); UpdateFlops(2*Length()); return(result); }
void minimize_dual(DOUBLE *Xopt, DOUBLE *Xorig, INT length, DOUBLE *SSt, DOUBLE *SXt, DOUBLE *SXtXSt, DOUBLE trXXt, \ DOUBLE c, INT N, INT K) { DOUBLE INTERV = 0.1; DOUBLE EXT = 3.0; INT MAX = 20; DOUBLE RATIO = (DOUBLE) 10; DOUBLE SIG = 0.1; DOUBLE RHO = SIG / (DOUBLE) 2; INT MN = K * 1; CHAR lamch_opt = 'U'; DOUBLE realmin = LAMCH(&lamch_opt); DOUBLE red = 1; INT i = 0; INT ls_failed = 0; DOUBLE f0; DOUBLE *df0 = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE)); DOUBLE *dftemp = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE)); DOUBLE *df3 = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE)); DOUBLE *s = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE)); DOUBLE d0; INT derivFlag = 1; DOUBLE *X = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE)); datacpy(X, Xorig, MN); INT maxNK = IMAX(N, K); DOUBLE *SStLambda = (DOUBLE *) MALLOC(maxNK * K * sizeof(DOUBLE)); DOUBLE *tempMatrix = (DOUBLE *) MALLOC(maxNK * K * sizeof(DOUBLE)); dual_obj_grad(&f0, df0, X, SSt, SXt, SXtXSt, trXXt, c, N, K, derivFlag, SStLambda, tempMatrix); INT incx = 1; INT incy = 1; datacpy(s, df0, MN); DOUBLE alpha = -1; SCAL(&MN, &alpha, s, &incx); d0 = - DOT(&MN, s, &incx, s, &incy); DOUBLE x1; DOUBLE x2; DOUBLE x3; DOUBLE x4; DOUBLE *X0 = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE)); DOUBLE *X3 = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE)); DOUBLE F0; DOUBLE *dF0 = (DOUBLE *) MALLOC(MN * sizeof(DOUBLE)); INT Mmin; DOUBLE f1; DOUBLE f2; DOUBLE f3; DOUBLE f4; DOUBLE d1; DOUBLE d2; DOUBLE d3; DOUBLE d4; INT success; DOUBLE A; DOUBLE B; DOUBLE sqrtquantity; DOUBLE tempnorm; DOUBLE tempinprod1; DOUBLE tempinprod2; DOUBLE tempscalefactor; x3 = red / (1 - d0); while (i++ < length) { datacpy(X0, X, MN); datacpy(dF0, df0, MN); F0 = f0; Mmin = MAX; while (1) { x2 = 0; f2 = f0; d2 = d0; f3 = f0; datacpy(df3, df0, MN); success = 0; while ((!success) && (Mmin > 0)) { Mmin = Mmin - 1; datacpy(X3, X, MN); alpha = x3; AXPY(&MN, &alpha, s, &incx, X3, &incy); dual_obj_grad(&f3, df3, X3, SSt, SXt, SXtXSt, trXXt, c, N, K, derivFlag, SStLambda, tempMatrix); if (ISNAN(f3) || ISINF(f3)) { /* any(isnan(df3)+isinf(df3)) */ x3 = (x2 + x3) * 0.5; } else { success = 1; } } if (f3 < F0) { datacpy(X0, X, MN); alpha = x3; AXPY(&MN, &alpha, s, &incx, X0, &incy); datacpy(dF0, df3, MN); F0 = f3; } d3 = DOT(&MN, df3, &incx, s, &incy); if ((d3 > SIG * d0) || (f3 > f0 + x3 * RHO * d0) || (Mmin == 0)) { break; } x1 = x2; f1 = f2; d1 = d2; x2 = x3; f2 = f3; d2 = d3; A = 6 * (f1 - f2) + 3 * (d2 + d1) * (x2 - x1); B = 3 * (f2 - f1) - (2 * d1 + d2) * (x2 - x1); sqrtquantity = B * B - A * d1 * (x2 - x1); if (sqrtquantity < 0) { x3 = x2 * EXT; } else { x3 = x1 - d1 * SQR(x2 - x1) / (B + SQRT(sqrtquantity)); if (ISNAN(x3) || ISINF(x3) || (x3 < 0)) { x3 = x2 * EXT; } else if (x3 > x2 * EXT) { x3 = x2 * EXT; } else if (x3 < x2 + INTERV * (x2 - x1)) { x3 = x2 + INTERV * (x2 - x1); } } } while (((ABS(d3) > - SIG * d0) || (f3 > f0 + x3 * RHO * d0)) && (Mmin > 0)) { if ((d3 > 0) || (f3 > f0 + x3 * RHO * d0)) { x4 = x3; f4 = f3; d4 = d3; } else { x2 = x3; f2 = f3; d2 = d3; } if (f4 > f0) { x3 = x2 - (0.5 * d2 * SQR(x4 - x2)) / (f4 - f2 - d2 * (x4 - x2)); } else { A = 6 * (f2 - f4) / (x4 - x2) + 3 * (d4 + d2); B = 3 * (f4 - f2) - (2 * d2 + d4) * (x4 - x2); x3 = x2 + (SQRT(B * B - A * d2 * SQR(x4 - x2)) - B) / A; } if (ISNAN(x3) || ISINF(x3)) { x3 = (x2 + x4) * 0.5; } x3 = IMAX(IMIN(x3, x4 - INTERV * (x4 - x2)), x2 + INTERV * (x4 - x2)); datacpy(X3, X, MN); alpha = x3; AXPY(&MN, &alpha, s, &incx, X3, &incy); dual_obj_grad(&f3, df3, X3, SSt, SXt, SXtXSt, trXXt, c, N, K, derivFlag, SStLambda, tempMatrix); if (f3 < F0) { datacpy(X0, X, MN); alpha = x3; AXPY(&MN, &alpha, s, &incx, X0, &incy); datacpy(dF0, df3, MN); F0 = f3; } Mmin = Mmin - 1; d3 = DOT(&MN, df3, &incx, s, &incy); } if ((ABS(d3) < - SIG * d0) && (f3 < f0 + x3 * RHO * d0)) { alpha = x3; AXPY(&MN, &alpha, s, &incx, X, &incy); f0 = f3; datacpy(dftemp, df3, MN); alpha = -1; AXPY(&MN, &alpha, df0, &incx, dftemp, &incy); tempinprod1 = DOT(&MN, dftemp, &incx, df3, &incy); tempnorm = NRM2(&MN, df0, &incx); tempinprod2 = SQR(tempnorm); tempscalefactor = tempinprod1 / tempinprod2; alpha = tempscalefactor; SCAL(&MN, &alpha, s, &incx); alpha = -1; AXPY(&MN, &alpha, df3, &incx, s, &incy); datacpy(df0, df3, MN); d3 = d0; d0 = DOT(&MN, df0, &incx, s, &incy); if (d0 > 0) { datacpy(s, df0, MN); alpha = -1; SCAL(&MN, &alpha, s, &incx); tempnorm = NRM2(&MN, s, &incx); d0 = - SQR(tempnorm); } x3 = x3 * IMIN(RATIO, d3 / (d0 - realmin)); ls_failed = 0; } else { datacpy(X, X0, MN); datacpy(df0, dF0, MN); f0 = F0; if ((ls_failed == 1) || (i > length)) { break; } datacpy(s, df0, MN); alpha = -1; SCAL(&MN, &alpha, s, &incx); tempnorm = NRM2(&MN, s, &incx); d0 = - SQR(tempnorm); x3 = 1 / (1 - d0); ls_failed = 1; } } datacpy(Xopt, X, MN); FREE(SStLambda); FREE(tempMatrix); FREE(df0); FREE(dftemp); FREE(df3); FREE(s); FREE(X); FREE(X0); FREE(X3); FREE(dF0); }
void dual_obj_grad(DOUBLE *obj, DOUBLE *deriv, DOUBLE *dualLambda, DOUBLE *SSt, DOUBLE *SXt, DOUBLE *SXtXSt, DOUBLE trXXt, \ DOUBLE c, INT N, INT K, INT derivFlag, DOUBLE *SStLambda, DOUBLE *tempMatrix) { INT maxNK = IMAX(N, K); INT SStLambdaFlag = 0; if (SStLambda == NULL) { SStLambda = (DOUBLE *) MALLOC(maxNK * K * sizeof(DOUBLE)); SStLambdaFlag = 1; } INT tempMatrixFlag = 0; if (tempMatrix == NULL) { tempMatrix = (DOUBLE *) MALLOC(maxNK * K * sizeof(DOUBLE)); tempMatrixFlag = 1; } datacpy(SStLambda, SSt, K * K); INT iterK; /* #pragma omp parallel for private(iterK) shared(SStLambda, dualLambda, K) */ for (iterK = 0; iterK < K; ++iterK) { SStLambda[iterK * K + iterK] += dualLambda[iterK]; } CHAR uplo = 'U'; INT POTRSN = K; INT POTRSLDA = K; INT INFO; POTRF(&uplo, &POTRSN, SStLambda, &POTRSLDA, &INFO); datacpy(tempMatrix, SXtXSt, K * K); INT POTRSNRHS = K; INT POTRSLDB = K; POTRS(&uplo, &POTRSN, &POTRSNRHS, SStLambda, &POTRSLDA, tempMatrix, &POTRSLDB, &INFO); DOUBLE objTemp = 0; /* #pragma omp parallel for private(iterK) shared(tempMatrix, K) reduction(-: objTemp) */ for (iterK = 0; iterK < K; ++iterK) { objTemp = objTemp - tempMatrix[iterK * K + iterK]; } INT ASUMN = K; INT incx = 1; DOUBLE sumDualLambda = ASUM(&ASUMN, dualLambda, &incx); objTemp += trXXt - c * sumDualLambda; *obj = - objTemp; if (derivFlag == 1) { datacpy(tempMatrix, SXt, K * N); POTRSNRHS = N; POTRSLDB = K; POTRS(&uplo, &POTRSN, &POTRSNRHS, SStLambda, &POTRSLDA, tempMatrix, &POTRSLDB, &INFO); transpose(tempMatrix, SStLambda, K, N); INT NRM2N = N; DOUBLE tempNorm; #pragma omp parallel for private(iterK, tempNorm) shared(SStLambda, deriv, K, c) for (iterK = 0; iterK < K; ++iterK) { tempNorm = NRM2(&NRM2N, &SStLambda[iterK * N], &incx); deriv[iterK] = - SQR(tempNorm) + c; } } if (SStLambdaFlag == 1) { FREE(SStLambda); } if (tempMatrixFlag == 1) { FREE(tempMatrix); } }
/* * ... RHO[0] = RHO_PREV * ... RHO[1] = RHO * ... RHO[2] = ALPHA * ... RHO[3] = BETA * ... RHO[4] = |X| */ void OPK_TRCG(const opk_index_t n, real_t p[], const real_t q[], real_t r[], real_t x[], const real_t z[], const real_t delta, real_t rho[5], opk_cg_state_t *state) { const real_t zero = OPK_REALCONST(0.0); const real_t one = OPK_REALCONST(1.0); real_t a, b, c, d, e, xn, sum, tmp; real_t pq, alpha, beta; long i; if (delta <= zero) { *state = OPK_CG_ERROR; return; } switch (*state) { case OPK_CG_START: /* Start with no initial guess: fill x with zeros, there is no needs to compute q = A.x0 since it is 0. */ ZERO(n, x); rho[0] = rho[1] = rho[2] = rho[3] = rho[4] = zero; *state = OPK_CG_NEWX; return; case OPK_CG_RESTART: /* Start or restart with initial x given: copy initial x into p and request caller to compte: q = A.p */ rho[0] = rho[1] = rho[2] = rho[3] = zero; xn = NRM2(n, x); if (xn >= delta) { if (xn > delta) { SCAL(n, delta/xn, x); } rho[4] = delta; /* |X| */ *state = OPK_CG_TRUNCATED; } else { rho[4] = xn; /* |X| */ COPY(n, x, p); *state = OPK_CG_AP; } return; case OPK_CG_NEWX: if (z == NULL) { /* No preconditioning. Take z = r and jump to next case to compute conjugate gradient direction. */ z = r; } else { /* Preconditioned version of the algorithm. Request caller to compute preconditioned residuals. */ *state = OPK_CG_PRECOND; return; } case OPK_CG_PRECOND: /* Caller has been requested to compute preconditioned residuals. Use conjugate gradients recurrence to compute next search direption p. */ rho[1] = DOT(n, r, z); if (rho[1] <= zero) { /* If r'.z too small, then algorithm has converged or preconditioner is not positive definite. */ *state = (rho[1] < zero ? OPK_CG_NON_CONVEX : OPK_CG_FINISH); return; } if (rho[0] > zero) { beta = rho[1]/rho[0]; for (i = 0; i < n; ++i) { p[i] = z[i] + beta*p[i]; } } else { beta = zero; COPY(n, z, p); } rho[3] = beta; /* Request caller to compute: q = A.p */ *state = OPK_CG_AP; return; case OPK_CG_AP: if (rho[1] > zero) { /* Caller has been requested to compute q = A.p. Compute optimal step size and update the variables and the residuals. */ pq = DOT(n, p, q); if (pq > zero) { alpha = rho[1]/pq; /* optimal step size */ rho[2] = alpha; /* memorize optimal step size */ if (alpha == zero) { /* If alpha too small, then algorithm has converged. */ *state = OPK_CG_FINISH; return; } sum = zero; for (i = 0; i < n; ++i) { tmp = x[i] + alpha*p[i]; sum += tmp*tmp; } xn = SQRT(sum); if (xn <= delta) { /* Optimal step not too long, take it. */ AXPY(n, alpha, p, x); AXPY(n, -alpha, q, r); rho[0] = rho[1]; rho[4] = xn; *state = (xn < delta ? OPK_CG_NEWX : OPK_CG_TRUNCATED); return; } } /* Operator A is not positive definite (P'.A.P < 0) or optimal step leads us ouside the trust region. In these cases, we take a truncated step X + ALPHA*P along P so that |X + ALPHA*P| = DELTA with ALPHA >= 0. This amounts to find the positive root of: A*ALPHA^2 + 2*B*ALPHA + C with: A = |P|^2, B = X'.P, and C = |X|^2 - DELTA^2. Note that the reduced discriminant is D = B*B - A*C which expands to D = (DELTA^2 - |X|^2*sin(THETA)^2)*|P|^2 with THETA the angle between X and P, as DELTA > |X|, then D > 0 must hold. */ a = DOT(n, p, p); if (a <= zero) { /* This can only occurs if P = 0 (and thus A = 0). It is probably due to rounding errors and the algorithm should be restarted. */ *state = OPK_CG_FINISH; return; } b = DOT(n, x, p); c = (rho[4] + delta)*(rho[4] - delta); /* RHO[4] = |X| */ if (c >= zero) { /* This can only occurs if the caller has modified DELTA or RHO[4], which stores the norm of X, and thus is considered as an error. */ *state = OPK_CG_ERROR; return; } /* Normalize the coefficients to avoid overflows. */ d = FABS(a); if ((e = FABS(b)) > d) d = e; if ((e = FABS(c)) > d) d = e; d = one/d; a *= d; b *= d; c *= d; /* Compute the reduced discriminant. */ d = b*b - a*c; if (d > zero) { /* The polynomial has two real roots of opposite signs (because C/A < 0 by construction), ALPHA is the positive one. Compute this root avoiding numerical errors (by adding numbers of same sign). */ e = SQRT(d); if (b >= zero) { alpha = -c/(e + b); } else { alpha = (e - b)/a; } } else { /* D > 0 must hold. There must be something wrong... */ *state = OPK_CG_ERROR; return; } if (alpha > zero) { AXPY(n, alpha, p, x); AXPY(n, -alpha, q, r); } rho[0] = rho[1]; rho[2] = alpha; /* memorize optimal step size */ rho[4] = delta; *state = OPK_CG_TRUNCATED; return; } else { /* Caller has been requested to compute q = A.x0 Update the residuals. */ for (i = 0; i < n; ++i) { r[i] -= q[i]; } rho[2] = zero; /* ALPHA = 0 (no step has been taken yet) */ rho[3] = zero; /* BETA = 0 (ditto) */ } /* Variables X and residuals R available for inspection. */ *state = OPK_CG_NEWX; return; case OPK_CG_FINISH: case OPK_CG_NON_CONVEX: case OPK_CG_TRUNCATED: /* The caller can restart the algorithm with final 'x', 'state' set to OPK_CG_RESTART and 'r' set to 'b' and, maybe, a larger 'delta'. */ return; default: /* There must be something wrong... */ *state = OPK_CG_ERROR; return; } }