int dlamc4_(int *emin, double *start, int *base) { /* System generated locals */ int i__1; double d__1; /* Local variables */ static double zero, a; static int i; static double rbase, b1, b2, c1, c2, d1, d2; extern double dlamc3_(double *, double *); static double one; a = *start; one = 1.; rbase = one / *base; zero = 0.; *emin = 1; d__1 = a * rbase; b1 = dlamc3_(&d__1, &zero); c1 = a; c2 = a; d1 = a; d2 = a; /* + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP */ L10: if (c1 == a && c2 == a && d1 == a && d2 == a) { --(*emin); a = b1; d__1 = a / *base; b1 = dlamc3_(&d__1, &zero); d__1 = b1 * *base; c1 = dlamc3_(&d__1, &zero); d1 = zero; i__1 = *base; for (i = 1; i <= *base; ++i) { d1 += b1; /* L20: */ } d__1 = a * rbase; b2 = dlamc3_(&d__1, &zero); d__1 = b2 / rbase; c2 = dlamc3_(&d__1, &zero); d2 = zero; i__1 = *base; for (i = 1; i <= *base; ++i) { d2 += b2; /* L30: */ } goto L10; } /* + END WHILE */ return 0; /* End of DLAMC4 */ } /* dlamc4_ */
/*< SUBROUTINE DLAMC1( BETA, T, RND, IEEE1 ) >*/ /* Subroutine */ int dlamc1_(integer *beta, integer *t, logical *rnd, logical *ieee1) { /* Initialized data */ static logical first = TRUE_; /* runtime-initialized constant */ /* System generated locals */ doublereal d__1, d__2; /* Local variables */ doublereal a, b, c__, f, t1, t2; static integer lt; /* runtime-initialized constant */ doublereal one, qtr; static logical lrnd; /* runtime-initialized constant */ static integer lbeta; /* runtime-initialized constant */ doublereal savec; extern doublereal dlamc3_(doublereal *, doublereal *); static logical lieee1; /* runtime-initialized constant */ /* -- LAPACK auxiliary routine (version 1.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ /* Courant Institute, Argonne National Lab, and Rice University */ /* October 31, 1992 */ /* .. Scalar Arguments .. */ /*< LOGICAL IEEE1, RND >*/ /*< INTEGER BETA, T >*/ /* .. */ /* Purpose */ /* ======= */ /* DLAMC1 determines the machine parameters given by BETA, T, RND, and */ /* IEEE1. */ /* Arguments */ /* ========= */ /* BETA (output) INTEGER */ /* The base of the machine. */ /* T (output) INTEGER */ /* The number of ( BETA ) digits in the mantissa. */ /* RND (output) LOGICAL */ /* Specifies whether proper rounding ( RND = .TRUE. ) or */ /* chopping ( RND = .FALSE. ) occurs in addition. This may not */ /* be a reliable guide to the way in which the machine performs */ /* its arithmetic. */ /* IEEE1 (output) LOGICAL */ /* Specifies whether rounding appears to be done in the IEEE */ /* 'round to nearest' style. */ /* Further Details */ /* =============== */ /* The routine is based on the routine ENVRON by Malcolm and */ /* incorporates suggestions by Gentleman and Marovich. See */ /* Malcolm M. A. (1972) Algorithms to reveal properties of */ /* floating-point arithmetic. Comms. of the ACM, 15, 949-951. */ /* Gentleman W. M. and Marovich S. B. (1974) More on algorithms */ /* that reveal properties of floating point arithmetic units. */ /* Comms. of the ACM, 17, 276-277. */ /* ===================================================================== */ /* .. Local Scalars .. */ /*< LOGICAL FIRST, LIEEE1, LRND >*/ /*< INTEGER LBETA, LT >*/ /*< DOUBLE PRECISION A, B, C, F, ONE, QTR, SAVEC, T1, T2 >*/ /* .. */ /* .. External Functions .. */ /*< DOUBLE PRECISION DLAMC3 >*/ /*< EXTERNAL DLAMC3 >*/ /* .. */ /* .. Save statement .. */ /*< SAVE FIRST, LIEEE1, LBETA, LRND, LT >*/ /* .. */ /* .. Data statements .. */ /*< DATA FIRST / .TRUE. / >*/ /* .. */ /* .. Executable Statements .. */ /*< IF( FIRST ) THEN >*/ if (first) { /*< FIRST = .FALSE. >*/ first = FALSE_; /*< ONE = 1 >*/ one = 1.; /* LBETA, LIEEE1, LT and LRND are the local values of BETA, */ /* IEEE1, T and RND. */ /* Throughout this routine we use the function DLAMC3 to ensure */ /* that relevant values are stored and not held in registers, or */ /* are not affected by optimizers. */ /* Compute a = 2.0**m with the smallest positive integer m such */ /* that */ /* fl( a + 1.0 ) = a. */ /*< A = 1 >*/ a = 1.; /*< C = 1 >*/ c__ = 1.; /* + WHILE( C.EQ.ONE )LOOP */ /*< 10 CONTINUE >*/ L10: /*< IF( C.EQ.ONE ) THEN >*/ if (c__ == one) { /*< A = 2*A >*/ a *= 2; /*< C = DLAMC3( A, ONE ) >*/ c__ = dlamc3_(&a, &one); /*< C = DLAMC3( C, -A ) >*/ d__1 = -a; c__ = dlamc3_(&c__, &d__1); /*< GO TO 10 >*/ goto L10; /*< END IF >*/ } /* + END WHILE */ /* Now compute b = 2.0**m with the smallest positive integer m */ /* such that */ /* fl( a + b ) .gt. a. */ /*< B = 1 >*/ b = 1.; /*< C = DLAMC3( A, B ) >*/ c__ = dlamc3_(&a, &b); /* + WHILE( C.EQ.A )LOOP */ /*< 20 CONTINUE >*/ L20: /*< IF( C.EQ.A ) THEN >*/ if (c__ == a) { /*< B = 2*B >*/ b *= 2; /*< C = DLAMC3( A, B ) >*/ c__ = dlamc3_(&a, &b); /*< GO TO 20 >*/ goto L20; /*< END IF >*/ } /* + END WHILE */ /* Now compute the base. a and c are neighbouring floating point */ /* numbers in the interval ( beta**t, beta**( t + 1 ) ) and so */ /* their difference is beta. Adding 0.25 to c is to ensure that it */ /* is truncated to beta and not ( beta - 1 ). */ /*< QTR = ONE / 4 >*/ qtr = one / 4; /*< SAVEC = C >*/ savec = c__; /*< C = DLAMC3( C, -A ) >*/ d__1 = -a; c__ = dlamc3_(&c__, &d__1); /*< LBETA = C + QTR >*/ lbeta = (integer) (c__ + qtr); /* Now determine whether rounding or chopping occurs, by adding a */ /* bit less than beta/2 and a bit more than beta/2 to a. */ /*< B = LBETA >*/ b = (doublereal) lbeta; /*< F = DLAMC3( B / 2, -B / 100 ) >*/ d__1 = b / 2; d__2 = -b / 100; f = dlamc3_(&d__1, &d__2); /*< C = DLAMC3( F, A ) >*/ c__ = dlamc3_(&f, &a); /*< IF( C.EQ.A ) THEN >*/ if (c__ == a) { /*< LRND = .TRUE. >*/ lrnd = TRUE_; /*< ELSE >*/ } else { /*< LRND = .FALSE. >*/ lrnd = FALSE_; /*< END IF >*/ } /*< F = DLAMC3( B / 2, B / 100 ) >*/ d__1 = b / 2; d__2 = b / 100; f = dlamc3_(&d__1, &d__2); /*< C = DLAMC3( F, A ) >*/ c__ = dlamc3_(&f, &a); /*< >*/ if (lrnd && c__ == a) { lrnd = FALSE_; } /* Try and decide whether rounding is done in the IEEE 'round to */ /* nearest' style. B/2 is half a unit in the last place of the two */ /* numbers A and SAVEC. Furthermore, A is even, i.e. has last bit */ /* zero, and SAVEC is odd. Thus adding B/2 to A should not change */ /* A, but adding B/2 to SAVEC should change SAVEC. */ /*< T1 = DLAMC3( B / 2, A ) >*/ d__1 = b / 2; t1 = dlamc3_(&d__1, &a); /*< T2 = DLAMC3( B / 2, SAVEC ) >*/ d__1 = b / 2; t2 = dlamc3_(&d__1, &savec); /*< LIEEE1 = ( T1.EQ.A ) .AND. ( T2.GT.SAVEC ) .AND. LRND >*/ lieee1 = t1 == a && t2 > savec && lrnd; /* Now find the mantissa, t. It should be the integer part of */ /* log to the base beta of a, however it is safer to determine t */ /* by powering. So we find t as the smallest positive integer for */ /* which */ /* fl( beta**t + 1.0 ) = 1.0. */ /*< LT = 0 >*/ lt = 0; /*< A = 1 >*/ a = 1.; /*< C = 1 >*/ c__ = 1.; /* + WHILE( C.EQ.ONE )LOOP */ /*< 30 CONTINUE >*/ L30: /*< IF( C.EQ.ONE ) THEN >*/ if (c__ == one) { /*< LT = LT + 1 >*/ ++lt; /*< A = A*LBETA >*/ a *= lbeta; /*< C = DLAMC3( A, ONE ) >*/ c__ = dlamc3_(&a, &one); /*< C = DLAMC3( C, -A ) >*/ d__1 = -a; c__ = dlamc3_(&c__, &d__1); /*< GO TO 30 >*/ goto L30; /*< END IF >*/ } /* + END WHILE */ /*< END IF >*/ } /*< BETA = LBETA >*/ *beta = lbeta; /*< T = LT >*/ *t = lt; /*< RND = LRND >*/ *rnd = lrnd; /*< IEEE1 = LIEEE1 >*/ *ieee1 = lieee1; /*< RETURN >*/ return 0; /* End of DLAMC1 */ /*< END >*/ } /* dlamc1_ */
/*< SUBROUTINE DLAMC5( BETA, P, EMIN, IEEE, EMAX, RMAX ) >*/ /* Subroutine */ int dlamc5_(integer *beta, integer *p, integer *emin, logical *ieee, integer *emax, doublereal *rmax) { /* System generated locals */ integer i__1; doublereal d__1; /* Local variables */ integer i__; doublereal y, z__; integer try__, lexp; doublereal oldy=0; integer uexp, nbits; extern doublereal dlamc3_(doublereal *, doublereal *); doublereal recbas; integer exbits, expsum; /* -- LAPACK auxiliary routine (version 1.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ /* Courant Institute, Argonne National Lab, and Rice University */ /* October 31, 1992 */ /* .. Scalar Arguments .. */ /*< LOGICAL IEEE >*/ /*< INTEGER BETA, EMAX, EMIN, P >*/ /*< DOUBLE PRECISION RMAX >*/ /* .. */ /* Purpose */ /* ======= */ /* DLAMC5 attempts to compute RMAX, the largest machine floating-point */ /* number, without overflow. It assumes that EMAX + abs(EMIN) sum */ /* approximately to a power of 2. It will fail on machines where this */ /* assumption does not hold, for example, the Cyber 205 (EMIN = -28625, */ /* EMAX = 28718). It will also fail if the value supplied for EMIN is */ /* too large (i.e. too close to zero), probably with overflow. */ /* Arguments */ /* ========= */ /* BETA (input) INTEGER */ /* The base of floating-point arithmetic. */ /* P (input) INTEGER */ /* The number of base BETA digits in the mantissa of a */ /* floating-point value. */ /* EMIN (input) INTEGER */ /* The minimum exponent before (gradual) underflow. */ /* IEEE (input) LOGICAL */ /* A logical flag specifying whether or not the arithmetic */ /* system is thought to comply with the IEEE standard. */ /* EMAX (output) INTEGER */ /* The largest exponent before overflow */ /* RMAX (output) DOUBLE PRECISION */ /* The largest machine floating-point number. */ /* ===================================================================== */ /* .. Parameters .. */ /*< DOUBLE PRECISION ZERO, ONE >*/ /*< PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 ) >*/ /* .. */ /* .. Local Scalars .. */ /*< INTEGER EXBITS, EXPSUM, I, LEXP, NBITS, TRY, UEXP >*/ /*< DOUBLE PRECISION OLDY, RECBAS, Y, Z >*/ /* .. */ /* .. External Functions .. */ /*< DOUBLE PRECISION DLAMC3 >*/ /*< EXTERNAL DLAMC3 >*/ /* .. */ /* .. Intrinsic Functions .. */ /*< INTRINSIC MOD >*/ /* .. */ /* .. Executable Statements .. */ /* First compute LEXP and UEXP, two powers of 2 that bound */ /* abs(EMIN). We then assume that EMAX + abs(EMIN) will sum */ /* approximately to the bound that is closest to abs(EMIN). */ /* (EMAX is the exponent of the required number RMAX). */ /*< LEXP = 1 >*/ lexp = 1; /*< EXBITS = 1 >*/ exbits = 1; /*< 10 CONTINUE >*/ L10: /*< TRY = LEXP*2 >*/ try__ = lexp << 1; /*< IF( TRY.LE.( -EMIN ) ) THEN >*/ if (try__ <= -(*emin)) { /*< LEXP = TRY >*/ lexp = try__; /*< EXBITS = EXBITS + 1 >*/ ++exbits; /*< GO TO 10 >*/ goto L10; /*< END IF >*/ } /*< IF( LEXP.EQ.-EMIN ) THEN >*/ if (lexp == -(*emin)) { /*< UEXP = LEXP >*/ uexp = lexp; /*< ELSE >*/ } else { /*< UEXP = TRY >*/ uexp = try__; /*< EXBITS = EXBITS + 1 >*/ ++exbits; /*< END IF >*/ } /* Now -LEXP is less than or equal to EMIN, and -UEXP is greater */ /* than or equal to EMIN. EXBITS is the number of bits needed to */ /* store the exponent. */ /*< IF( ( UEXP+EMIN ).GT.( -LEXP-EMIN ) ) THEN >*/ if (uexp + *emin > -lexp - *emin) { /*< EXPSUM = 2*LEXP >*/ expsum = lexp << 1; /*< ELSE >*/ } else { /*< EXPSUM = 2*UEXP >*/ expsum = uexp << 1; /*< END IF >*/ } /* EXPSUM is the exponent range, approximately equal to */ /* EMAX - EMIN + 1 . */ /*< EMAX = EXPSUM + EMIN - 1 >*/ *emax = expsum + *emin - 1; /*< NBITS = 1 + EXBITS + P >*/ nbits = exbits + 1 + *p; /* NBITS is the total number of bits needed to store a */ /* floating-point number. */ /*< IF( ( MOD( NBITS, 2 ).EQ.1 ) .AND. ( BETA.EQ.2 ) ) THEN >*/ if (nbits % 2 == 1 && *beta == 2) { /* Either there are an odd number of bits used to store a */ /* floating-point number, which is unlikely, or some bits are */ /* not used in the representation of numbers, which is possible, */ /* (e.g. Cray machines) or the mantissa has an implicit bit, */ /* (e.g. IEEE machines, Dec Vax machines), which is perhaps the */ /* most likely. We have to assume the last alternative. */ /* If this is true, then we need to reduce EMAX by one because */ /* there must be some way of representing zero in an implicit-bit */ /* system. On machines like Cray, we are reducing EMAX by one */ /* unnecessarily. */ /*< EMAX = EMAX - 1 >*/ --(*emax); /*< END IF >*/ } /*< IF( IEEE ) THEN >*/ if (*ieee) { /* Assume we are on an IEEE machine which reserves one exponent */ /* for infinity and NaN. */ /*< EMAX = EMAX - 1 >*/ --(*emax); /*< END IF >*/ } /* Now create RMAX, the largest machine number, which should */ /* be equal to (1.0 - BETA**(-P)) * BETA**EMAX . */ /* First compute 1.0 - BETA**(-P), being careful that the */ /* result is less than 1.0 . */ /*< RECBAS = ONE / BETA >*/ recbas = 1. / *beta; /*< Z = BETA - ONE >*/ z__ = *beta - 1.; /*< Y = ZERO >*/ y = 0.; /*< DO 20 I = 1, P >*/ i__1 = *p; for (i__ = 1; i__ <= i__1; ++i__) { /*< Z = Z*RECBAS >*/ z__ *= recbas; /*< >*/ if (y < 1.) { oldy = y; } /*< Y = DLAMC3( Y, Z ) >*/ y = dlamc3_(&y, &z__); /*< 20 CONTINUE >*/ /* L20: */ } /*< >*/ if (y >= 1.) { y = oldy; } /* Now multiply by BETA**EMAX to get RMAX. */ /*< DO 30 I = 1, EMAX >*/ i__1 = *emax; for (i__ = 1; i__ <= i__1; ++i__) { /*< Y = DLAMC3( Y*BETA, ZERO ) >*/ d__1 = y * *beta; y = dlamc3_(&d__1, &c_b32); /*< 30 CONTINUE >*/ /* L30: */ } /*< RMAX = Y >*/ *rmax = y; /*< RETURN >*/ return 0; /* End of DLAMC5 */ /*< END >*/ } /* dlamc5_ */
/* Subroutine */ int dlaed9_(integer *k, integer *kstart, integer *kstop, integer *n, doublereal *d__, doublereal *q, integer *ldq, doublereal * rho, doublereal *dlamda, doublereal *w, doublereal *s, integer *lds, integer *info) { /* System generated locals */ integer q_dim1, q_offset, s_dim1, s_offset, i__1, i__2; doublereal d__1; /* Builtin functions */ double sqrt(doublereal), d_sign(doublereal *, doublereal *); /* Local variables */ integer i__, j; doublereal temp; extern doublereal dnrm2_(integer *, doublereal *, integer *); extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), dlaed4_(integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *); extern doublereal dlamc3_(doublereal *, doublereal *); extern /* Subroutine */ int xerbla_(char *, integer *); /* -- LAPACK routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAED9 finds the roots of the secular equation, as defined by the */ /* values in D, Z, and RHO, between KSTART and KSTOP. It makes the */ /* appropriate calls to DLAED4 and then stores the new matrix of */ /* eigenvectors for use in calculating the next level of Z vectors. */ /* Arguments */ /* ========= */ /* K (input) INTEGER */ /* The number of terms in the rational function to be solved by */ /* DLAED4. K >= 0. */ /* KSTART (input) INTEGER */ /* KSTOP (input) INTEGER */ /* The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP */ /* are to be computed. 1 <= KSTART <= KSTOP <= K. */ /* N (input) INTEGER */ /* The number of rows and columns in the Q matrix. */ /* N >= K (delation may result in N > K). */ /* D (output) DOUBLE PRECISION array, dimension (N) */ /* D(I) contains the updated eigenvalues */ /* for KSTART <= I <= KSTOP. */ /* Q (workspace) DOUBLE PRECISION array, dimension (LDQ,N) */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= max( 1, N ). */ /* RHO (input) DOUBLE PRECISION */ /* The value of the parameter in the rank one update equation. */ /* RHO >= 0 required. */ /* DLAMDA (input) DOUBLE PRECISION array, dimension (K) */ /* The first K elements of this array contain the old roots */ /* of the deflated updating problem. These are the poles */ /* of the secular equation. */ /* W (input) DOUBLE PRECISION array, dimension (K) */ /* The first K elements of this array contain the components */ /* of the deflation-adjusted updating vector. */ /* S (output) DOUBLE PRECISION array, dimension (LDS, K) */ /* Will contain the eigenvectors of the repaired matrix which */ /* will be stored for subsequent Z vector calculation and */ /* multiplied by the previously accumulated eigenvectors */ /* to update the system. */ /* LDS (input) INTEGER */ /* The leading dimension of S. LDS >= max( 1, K ). */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = 1, an eigenvalue did not converge */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Jeff Rutter, Computer Science Division, University of California */ /* at Berkeley, USA */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --dlamda; --w; s_dim1 = *lds; s_offset = 1 + s_dim1; s -= s_offset; /* Function Body */ *info = 0; if (*k < 0) { *info = -1; } else if (*kstart < 1 || *kstart > max(1,*k)) { *info = -2; } else if (max(1,*kstop) < *kstart || *kstop > max(1,*k)) { *info = -3; } else if (*n < *k) { *info = -4; } else if (*ldq < max(1,*k)) { *info = -7; } else if (*lds < max(1,*k)) { *info = -12; } if (*info != 0) { i__1 = -(*info); xerbla_("DLAED9", &i__1); return 0; } /* Quick return if possible */ if (*k == 0) { return 0; } /* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can */ /* be computed with high relative accuracy (barring over/underflow). */ /* This is a problem on machines without a guard digit in */ /* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */ /* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), */ /* which on any of these machines zeros out the bottommost */ /* bit of DLAMDA(I) if it is 1; this makes the subsequent */ /* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation */ /* occurs. On binary machines with a guard digit (almost all */ /* machines) it does not change DLAMDA(I) at all. On hexadecimal */ /* and decimal machines with a guard digit, it slightly */ /* changes the bottommost bits of DLAMDA(I). It does not account */ /* for hexadecimal or decimal machines without guard digits */ /* (we know of none). We use a subroutine call to compute */ /* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating */ /* this code. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; /* L10: */ } i__1 = *kstop; for (j = *kstart; j <= i__1; ++j) { dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], info); /* If the zero finder fails, the computation is terminated. */ if (*info != 0) { goto L120; } /* L20: */ } if (*k == 1 || *k == 2) { i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = *k; for (j = 1; j <= i__2; ++j) { s[j + i__ * s_dim1] = q[j + i__ * q_dim1]; /* L30: */ } /* L40: */ } goto L120; } /* Compute updated W. */ dcopy_(k, &w[1], &c__1, &s[s_offset], &c__1); /* Initialize W(I) = Q(I,I) */ i__1 = *ldq + 1; dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1); i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); /* L50: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); /* L60: */ } /* L70: */ } i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { d__1 = sqrt(-w[i__]); w[i__] = d_sign(&d__1, &s[i__ + s_dim1]); /* L80: */ } /* Compute eigenvectors of the modified rank-1 modification. */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = *k; for (i__ = 1; i__ <= i__2; ++i__) { q[i__ + j * q_dim1] = w[i__] / q[i__ + j * q_dim1]; /* L90: */ } temp = dnrm2_(k, &q[j * q_dim1 + 1], &c__1); i__2 = *k; for (i__ = 1; i__ <= i__2; ++i__) { s[i__ + j * s_dim1] = q[i__ + j * q_dim1] / temp; /* L100: */ } /* L110: */ } L120: return 0; /* End of DLAED9 */ } /* dlaed9_ */
/* Subroutine */ int dlaed9_(integer *k, integer *kstart, integer *kstop, integer *n, doublereal *d__, doublereal *q, integer *ldq, doublereal * rho, doublereal *dlamda, doublereal *w, doublereal *s, integer *lds, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, Courant Institute, NAG Ltd., and Rice University September 30, 1994 Purpose ======= DLAED9 finds the roots of the secular equation, as defined by the values in D, Z, and RHO, between KSTART and KSTOP. It makes the appropriate calls to DLAED4 and then stores the new matrix of eigenvectors for use in calculating the next level of Z vectors. Arguments ========= K (input) INTEGER The number of terms in the rational function to be solved by DLAED4. K >= 0. KSTART (input) INTEGER KSTOP (input) INTEGER The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP are to be computed. 1 <= KSTART <= KSTOP <= K. N (input) INTEGER The number of rows and columns in the Q matrix. N >= K (delation may result in N > K). D (output) DOUBLE PRECISION array, dimension (N) D(I) contains the updated eigenvalues for KSTART <= I <= KSTOP. Q (workspace) DOUBLE PRECISION array, dimension (LDQ,N) LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= max( 1, N ). RHO (input) DOUBLE PRECISION The value of the parameter in the rank one update equation. RHO >= 0 required. DLAMDA (input) DOUBLE PRECISION array, dimension (K) The first K elements of this array contain the old roots of the deflated updating problem. These are the poles of the secular equation. W (input) DOUBLE PRECISION array, dimension (K) The first K elements of this array contain the components of the deflation-adjusted updating vector. S (output) DOUBLE PRECISION array, dimension (LDS, K) Will contain the eigenvectors of the repaired matrix which will be stored for subsequent Z vector calculation and multiplied by the previously accumulated eigenvectors to update the system. LDS (input) INTEGER The leading dimension of S. LDS >= max( 1, K ). INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if INFO = 1, an eigenvalue did not converge Further Details =============== Based on contributions by Jeff Rutter, Computer Science Division, University of California at Berkeley, USA ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer q_dim1, q_offset, s_dim1, s_offset, i__1, i__2; doublereal d__1; /* Builtin functions */ double sqrt(doublereal), d_sign(doublereal *, doublereal *); /* Local variables */ static doublereal temp; extern doublereal dnrm2_(integer *, doublereal *, integer *); static integer i__, j; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), dlaed4_(integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *); extern doublereal dlamc3_(doublereal *, doublereal *); extern /* Subroutine */ int xerbla_(char *, integer *); #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] #define s_ref(a_1,a_2) s[(a_2)*s_dim1 + a_1] --d__; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; --dlamda; --w; s_dim1 = *lds; s_offset = 1 + s_dim1 * 1; s -= s_offset; /* Function Body */ *info = 0; if (*k < 0) { *info = -1; } else if (*kstart < 1 || *kstart > max(1,*k)) { *info = -2; } else if (max(1,*kstop) < *kstart || *kstop > max(1,*k)) { *info = -3; } else if (*n < *k) { *info = -4; } else if (*ldq < max(1,*k)) { *info = -7; } else if (*lds < max(1,*k)) { *info = -12; } if (*info != 0) { i__1 = -(*info); xerbla_("DLAED9", &i__1); return 0; } /* Quick return if possible */ if (*k == 0) { return 0; } /* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can be computed with high relative accuracy (barring over/underflow). This is a problem on machines without a guard digit in add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), which on any of these machines zeros out the bottommost bit of DLAMDA(I) if it is 1; this makes the subsequent subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation occurs. On binary machines with a guard digit (almost all machines) it does not change DLAMDA(I) at all. On hexadecimal and decimal machines with a guard digit, it slightly changes the bottommost bits of DLAMDA(I). It does not account for hexadecimal or decimal machines without guard digits (we know of none). We use a subroutine call to compute 2*DLAMBDA(I) to prevent optimizing compilers from eliminating this code. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; /* L10: */ } i__1 = *kstop; for (j = *kstart; j <= i__1; ++j) { dlaed4_(k, &j, &dlamda[1], &w[1], &q_ref(1, j), rho, &d__[j], info); /* If the zero finder fails, the computation is terminated. */ if (*info != 0) { goto L120; } /* L20: */ } if (*k == 1 || *k == 2) { i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = *k; for (j = 1; j <= i__2; ++j) { s_ref(j, i__) = q_ref(j, i__); /* L30: */ } /* L40: */ } goto L120; } /* Compute updated W. */ dcopy_(k, &w[1], &c__1, &s[s_offset], &c__1); /* Initialize W(I) = Q(I,I) */ i__1 = *ldq + 1; dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1); i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { w[i__] *= q_ref(i__, j) / (dlamda[i__] - dlamda[j]); /* L50: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { w[i__] *= q_ref(i__, j) / (dlamda[i__] - dlamda[j]); /* L60: */ } /* L70: */ } i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { d__1 = sqrt(-w[i__]); w[i__] = d_sign(&d__1, &s_ref(i__, 1)); /* L80: */ } /* Compute eigenvectors of the modified rank-1 modification. */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = *k; for (i__ = 1; i__ <= i__2; ++i__) { q_ref(i__, j) = w[i__] / q_ref(i__, j); /* L90: */ } temp = dnrm2_(k, &q_ref(1, j), &c__1); i__2 = *k; for (i__ = 1; i__ <= i__2; ++i__) { s_ref(i__, j) = q_ref(i__, j) / temp; /* L100: */ } /* L110: */ } L120: return 0; /* End of DLAED9 */ } /* dlaed9_ */
/* Subroutine */ int dlasd8_(integer *icompq, integer *k, doublereal *d__, doublereal *z__, doublereal *vf, doublereal *vl, doublereal *difl, doublereal *difr, integer *lddifr, doublereal *dsigma, doublereal * work, integer *info) { /* System generated locals */ integer difr_dim1, difr_offset, i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal), d_sign(doublereal *, doublereal *); /* Local variables */ integer i__, j; doublereal dj, rho; integer iwk1, iwk2, iwk3; extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, integer *); doublereal temp; extern doublereal dnrm2_(integer *, doublereal *, integer *); integer iwk2i, iwk3i; doublereal diflj, difrj, dsigj; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); extern doublereal dlamc3_(doublereal *, doublereal *); extern /* Subroutine */ int dlasd4_(integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *), dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); doublereal dsigjp; /* -- LAPACK auxiliary routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASD8 finds the square roots of the roots of the secular equation, */ /* as defined by the values in DSIGMA and Z. It makes the appropriate */ /* calls to DLASD4, and stores, for each element in D, the distance */ /* to its two nearest poles (elements in DSIGMA). It also updates */ /* the arrays VF and VL, the first and last components of all the */ /* right singular vectors of the original bidiagonal matrix. */ /* DLASD8 is called from DLASD6. */ /* Arguments */ /* ========= */ /* ICOMPQ (input) INTEGER */ /* Specifies whether singular vectors are to be computed in */ /* factored form in the calling routine: */ /* = 0: Compute singular values only. */ /* = 1: Compute singular vectors in factored form as well. */ /* K (input) INTEGER */ /* The number of terms in the rational function to be solved */ /* by DLASD4. K >= 1. */ /* D (output) DOUBLE PRECISION array, dimension ( K ) */ /* On output, D contains the updated singular values. */ /* Z (input) DOUBLE PRECISION array, dimension ( K ) */ /* The first K elements of this array contain the components */ /* of the deflation-adjusted updating row vector. */ /* VF (input/output) DOUBLE PRECISION array, dimension ( K ) */ /* On entry, VF contains information passed through DBEDE8. */ /* On exit, VF contains the first K components of the first */ /* components of all right singular vectors of the bidiagonal */ /* matrix. */ /* VL (input/output) DOUBLE PRECISION array, dimension ( K ) */ /* On entry, VL contains information passed through DBEDE8. */ /* On exit, VL contains the first K components of the last */ /* components of all right singular vectors of the bidiagonal */ /* matrix. */ /* DIFL (output) DOUBLE PRECISION array, dimension ( K ) */ /* On exit, DIFL(I) = D(I) - DSIGMA(I). */ /* DIFR (output) DOUBLE PRECISION array, */ /* dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and */ /* dimension ( K ) if ICOMPQ = 0. */ /* On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not */ /* defined and will not be referenced. */ /* If ICOMPQ = 1, DIFR(1:K,2) is an array containing the */ /* normalizing factors for the right singular vector matrix. */ /* LDDIFR (input) INTEGER */ /* The leading dimension of DIFR, must be at least K. */ /* DSIGMA (input) DOUBLE PRECISION array, dimension ( K ) */ /* The first K elements of this array contain the old roots */ /* of the deflated updating problem. These are the poles */ /* of the secular equation. */ /* WORK (workspace) DOUBLE PRECISION array, dimension at least 3 * K */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = 1, an singular value did not converge */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ming Gu and Huan Ren, Computer Science Division, University of */ /* California at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --z__; --vf; --vl; --difl; difr_dim1 = *lddifr; difr_offset = 1 + difr_dim1; difr -= difr_offset; --dsigma; --work; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*k < 1) { *info = -2; } else if (*lddifr < *k) { *info = -9; } if (*info != 0) { i__1 = -(*info); xerbla_("DLASD8", &i__1); return 0; } /* Quick return if possible */ if (*k == 1) { d__[1] = abs(z__[1]); difl[1] = d__[1]; if (*icompq == 1) { difl[2] = 1.; difr[(difr_dim1 << 1) + 1] = 1.; } return 0; } /* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can */ /* be computed with high relative accuracy (barring over/underflow). */ /* This is a problem on machines without a guard digit in */ /* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */ /* The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), */ /* which on any of these machines zeros out the bottommost */ /* bit of DSIGMA(I) if it is 1; this makes the subsequent */ /* subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation */ /* occurs. On binary machines with a guard digit (almost all */ /* machines) it does not change DSIGMA(I) at all. On hexadecimal */ /* and decimal machines with a guard digit, it slightly */ /* changes the bottommost bits of DSIGMA(I). It does not account */ /* for hexadecimal or decimal machines without guard digits */ /* (we know of none). We use a subroutine call to compute */ /* 2*DSIGMA(I) to prevent optimizing compilers from eliminating */ /* this code. */ i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; /* L10: */ } /* Book keeping. */ iwk1 = 1; iwk2 = iwk1 + *k; iwk3 = iwk2 + *k; iwk2i = iwk2 - 1; iwk3i = iwk3 - 1; /* Normalize Z. */ rho = dnrm2_(k, &z__[1], &c__1); dlascl_("G", &c__0, &c__0, &rho, &c_b8, k, &c__1, &z__[1], k, info); rho *= rho; /* Initialize WORK(IWK3). */ dlaset_("A", k, &c__1, &c_b8, &c_b8, &work[iwk3], k); /* Compute the updated singular values, the arrays DIFL, DIFR, */ /* and the updated Z. */ i__1 = *k; for (j = 1; j <= i__1; ++j) { dlasd4_(k, &j, &dsigma[1], &z__[1], &work[iwk1], &rho, &d__[j], &work[ iwk2], info); /* If the root finder fails, the computation is terminated. */ if (*info != 0) { return 0; } work[iwk3i + j] = work[iwk3i + j] * work[j] * work[iwk2i + j]; difl[j] = -work[j]; difr[j + difr_dim1] = -work[j + 1]; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ j]); /* L20: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ j]); /* L30: */ } /* L40: */ } /* Compute updated Z. */ i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { d__2 = sqrt((d__1 = work[iwk3i + i__], abs(d__1))); z__[i__] = d_sign(&d__2, &z__[i__]); /* L50: */ } /* Update VF and VL. */ i__1 = *k; for (j = 1; j <= i__1; ++j) { diflj = difl[j]; dj = d__[j]; dsigj = -dsigma[j]; if (j < *k) { difrj = -difr[j + difr_dim1]; dsigjp = -dsigma[j + 1]; } work[j] = -z__[j] / diflj / (dsigma[j] + dj); i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigj) - diflj) / ( dsigma[i__] + dj); /* L60: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigjp) + difrj) / (dsigma[i__] + dj); /* L70: */ } temp = dnrm2_(k, &work[1], &c__1); work[iwk2i + j] = ddot_(k, &work[1], &c__1, &vf[1], &c__1) / temp; work[iwk3i + j] = ddot_(k, &work[1], &c__1, &vl[1], &c__1) / temp; if (*icompq == 1) { difr[j + (difr_dim1 << 1)] = temp; } /* L80: */ } dcopy_(k, &work[iwk2], &c__1, &vf[1], &c__1); dcopy_(k, &work[iwk3], &c__1, &vl[1], &c__1); return 0; /* End of DLASD8 */ } /* dlasd8_ */
/* Subroutine */ int dlamc4_(integer *emin, doublereal *start, integer *base) { /* System generated locals */ integer i__1; doublereal d__1; /* Local variables */ doublereal a; integer i__; doublereal b1, b2, c1, c2, d1, d2, one, zero, rbase; extern doublereal dlamc3_(doublereal *, doublereal *); /* -- LAPACK auxiliary routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAMC4 is a service routine for DLAMC2. */ /* Arguments */ /* ========= */ /* EMIN (output) INTEGER */ /* The minimum exponent before (gradual) underflow, computed by */ /* setting A = START and dividing by BASE until the previous A */ /* can not be recovered. */ /* START (input) DOUBLE PRECISION */ /* The starting point for determining EMIN. */ /* BASE (input) INTEGER */ /* The base of the machine. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ a = *start; one = 1.; rbase = one / *base; zero = 0.; *emin = 1; d__1 = a * rbase; b1 = dlamc3_(&d__1, &zero); c1 = a; c2 = a; d1 = a; d2 = a; /* + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. */ /* $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP */ L10: if (c1 == a && c2 == a && d1 == a && d2 == a) { --(*emin); a = b1; d__1 = a / *base; b1 = dlamc3_(&d__1, &zero); d__1 = b1 * *base; c1 = dlamc3_(&d__1, &zero); d1 = zero; i__1 = *base; for (i__ = 1; i__ <= i__1; ++i__) { d1 += b1; /* L20: */ } d__1 = a * rbase; b2 = dlamc3_(&d__1, &zero); d__1 = b2 / rbase; c2 = dlamc3_(&d__1, &zero); d2 = zero; i__1 = *base; for (i__ = 1; i__ <= i__1; ++i__) { d2 += b2; /* L30: */ } goto L10; } /* + END WHILE */ return 0; /* End of DLAMC4 */ } /* dlamc4_ */
/* Subroutine */ int dlamc1_(integer *beta, integer *t, logical *rnd, logical *ieee1) { /* Initialized data */ static logical first = TRUE_; /* System generated locals */ doublereal d__1, d__2; /* Local variables */ doublereal a, b, c__, f, t1, t2; static integer lt; doublereal one, qtr; static logical lrnd; static integer lbeta; doublereal savec; extern doublereal dlamc3_(doublereal *, doublereal *); static logical lieee1; /* -- LAPACK auxiliary routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAMC1 determines the machine parameters given by BETA, T, RND, and */ /* IEEE1. */ /* Arguments */ /* ========= */ /* BETA (output) INTEGER */ /* The base of the machine. */ /* T (output) INTEGER */ /* The number of ( BETA ) digits in the mantissa. */ /* RND (output) LOGICAL */ /* Specifies whether proper rounding ( RND = .TRUE. ) or */ /* chopping ( RND = .FALSE. ) occurs in addition. This may not */ /* be a reliable guide to the way in which the machine performs */ /* its arithmetic. */ /* IEEE1 (output) LOGICAL */ /* Specifies whether rounding appears to be done in the IEEE */ /* 'round to nearest' style. */ /* Further Details */ /* =============== */ /* The routine is based on the routine ENVRON by Malcolm and */ /* incorporates suggestions by Gentleman and Marovich. See */ /* Malcolm M. A. (1972) Algorithms to reveal properties of */ /* floating-point arithmetic. Comms. of the ACM, 15, 949-951. */ /* Gentleman W. M. and Marovich S. B. (1974) More on algorithms */ /* that reveal properties of floating point arithmetic units. */ /* Comms. of the ACM, 17, 276-277. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Save statement .. */ /* .. */ /* .. Data statements .. */ /* .. */ /* .. Executable Statements .. */ if (first) { one = 1.; /* LBETA, LIEEE1, LT and LRND are the local values of BETA, */ /* IEEE1, T and RND. */ /* Throughout this routine we use the function DLAMC3 to ensure */ /* that relevant values are stored and not held in registers, or */ /* are not affected by optimizers. */ /* Compute a = 2.0**m with the smallest positive integer m such */ /* that */ /* fl( a + 1.0 ) = a. */ a = 1.; c__ = 1.; /* + WHILE( C.EQ.ONE )LOOP */ L10: if (c__ == one) { a *= 2; c__ = dlamc3_(&a, &one); d__1 = -a; c__ = dlamc3_(&c__, &d__1); goto L10; } /* + END WHILE */ /* Now compute b = 2.0**m with the smallest positive integer m */ /* such that */ /* fl( a + b ) .gt. a. */ b = 1.; c__ = dlamc3_(&a, &b); /* + WHILE( C.EQ.A )LOOP */ L20: if (c__ == a) { b *= 2; c__ = dlamc3_(&a, &b); goto L20; } /* + END WHILE */ /* Now compute the base. a and c are neighbouring floating point */ /* numbers in the interval ( beta**t, beta**( t + 1 ) ) and so */ /* their difference is beta. Adding 0.25 to c is to ensure that it */ /* is truncated to beta and not ( beta - 1 ). */ qtr = one / 4; savec = c__; d__1 = -a; c__ = dlamc3_(&c__, &d__1); lbeta = (integer) (c__ + qtr); /* Now determine whether rounding or chopping occurs, by adding a */ /* bit less than beta/2 and a bit more than beta/2 to a. */ b = (doublereal) lbeta; d__1 = b / 2; d__2 = -b / 100; f = dlamc3_(&d__1, &d__2); c__ = dlamc3_(&f, &a); if (c__ == a) { lrnd = TRUE_; } else { lrnd = FALSE_; } d__1 = b / 2; d__2 = b / 100; f = dlamc3_(&d__1, &d__2); c__ = dlamc3_(&f, &a); if (lrnd && c__ == a) { lrnd = FALSE_; } /* Try and decide whether rounding is done in the IEEE 'round to */ /* nearest' style. B/2 is half a unit in the last place of the two */ /* numbers A and SAVEC. Furthermore, A is even, i.e. has last bit */ /* zero, and SAVEC is odd. Thus adding B/2 to A should not change */ /* A, but adding B/2 to SAVEC should change SAVEC. */ d__1 = b / 2; t1 = dlamc3_(&d__1, &a); d__1 = b / 2; t2 = dlamc3_(&d__1, &savec); lieee1 = t1 == a && t2 > savec && lrnd; /* Now find the mantissa, t. It should be the integer part of */ /* log to the base beta of a, however it is safer to determine t */ /* by powering. So we find t as the smallest positive integer for */ /* which */ /* fl( beta**t + 1.0 ) = 1.0. */ lt = 0; a = 1.; c__ = 1.; /* + WHILE( C.EQ.ONE )LOOP */ L30: if (c__ == one) { ++lt; a *= lbeta; c__ = dlamc3_(&a, &one); d__1 = -a; c__ = dlamc3_(&c__, &d__1); goto L30; } /* + END WHILE */ } *beta = lbeta; *t = lt; *rnd = lrnd; *ieee1 = lieee1; first = FALSE_; return 0; /* End of DLAMC1 */ } /* dlamc1_ */
/* Subroutine */ int dlasd3_(integer *nl, integer *nr, integer *sqre, integer *k, doublereal *d__, doublereal *q, integer *ldq, doublereal *dsigma, doublereal *u, integer *ldu, doublereal *u2, integer *ldu2, doublereal *vt, integer *ldvt, doublereal *vt2, integer *ldvt2, integer *idxc, integer *ctot, doublereal *z__, integer *info) { /* System generated locals */ integer q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, vt2_dim1, vt2_offset, i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal), d_sign(doublereal *, doublereal *); /* Local variables */ integer i__, j, m, n, jc; doublereal rho; integer nlp1, nlp2, nrp1; doublereal temp; extern doublereal dnrm2_(integer *, doublereal *, integer *); extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); integer ctemp; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); integer ktemp; extern doublereal dlamc3_(doublereal *, doublereal *); extern /* Subroutine */ int dlasd4_(integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *), dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *); /* -- LAPACK auxiliary routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLASD3 finds all the square roots of the roots of the secular */ /* equation, as defined by the values in D and Z. It makes the */ /* appropriate calls to DLASD4 and then updates the singular */ /* vectors by matrix multiplication. */ /* This code makes very mild assumptions about floating point */ /* arithmetic. It will work on machines with a guard digit in */ /* add/subtract, or on those binary machines without guard digits */ /* which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. */ /* It could conceivably fail on hexadecimal or decimal machines */ /* without guard digits, but we know of none. */ /* DLASD3 is called from DLASD1. */ /* Arguments */ /* ========= */ /* NL (input) INTEGER */ /* The row dimension of the upper block. NL >= 1. */ /* NR (input) INTEGER */ /* The row dimension of the lower block. NR >= 1. */ /* SQRE (input) INTEGER */ /* = 0: the lower block is an NR-by-NR square matrix. */ /* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */ /* The bidiagonal matrix has N = NL + NR + 1 rows and */ /* M = N + SQRE >= N columns. */ /* K (input) INTEGER */ /* The size of the secular equation, 1 =< K = < N. */ /* D (output) DOUBLE PRECISION array, dimension(K) */ /* On exit the square roots of the roots of the secular equation, */ /* in ascending order. */ /* Q (workspace) DOUBLE PRECISION array, */ /* dimension at least (LDQ,K). */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= K. */ /* DSIGMA (input) DOUBLE PRECISION array, dimension(K) */ /* The first K elements of this array contain the old roots */ /* of the deflated updating problem. These are the poles */ /* of the secular equation. */ /* U (output) DOUBLE PRECISION array, dimension (LDU, N) */ /* The last N - K columns of this matrix contain the deflated */ /* left singular vectors. */ /* LDU (input) INTEGER */ /* The leading dimension of the array U. LDU >= N. */ /* U2 (input/output) DOUBLE PRECISION array, dimension (LDU2, N) */ /* The first K columns of this matrix contain the non-deflated */ /* left singular vectors for the split problem. */ /* LDU2 (input) INTEGER */ /* The leading dimension of the array U2. LDU2 >= N. */ /* VT (output) DOUBLE PRECISION array, dimension (LDVT, M) */ /* The last M - K columns of VT' contain the deflated */ /* right singular vectors. */ /* LDVT (input) INTEGER */ /* The leading dimension of the array VT. LDVT >= N. */ /* VT2 (input/output) DOUBLE PRECISION array, dimension (LDVT2, N) */ /* The first K columns of VT2' contain the non-deflated */ /* right singular vectors for the split problem. */ /* LDVT2 (input) INTEGER */ /* The leading dimension of the array VT2. LDVT2 >= N. */ /* IDXC (input) INTEGER array, dimension ( N ) */ /* The permutation used to arrange the columns of U (and rows of */ /* VT) into three groups: the first group contains non-zero */ /* entries only at and above (or before) NL +1; the second */ /* contains non-zero entries only at and below (or after) NL+2; */ /* and the third is dense. The first column of U and the row of */ /* VT are treated separately, however. */ /* The rows of the singular vectors found by DLASD4 */ /* must be likewise permuted before the matrix multiplies can */ /* take place. */ /* CTOT (input) INTEGER array, dimension ( 4 ) */ /* A count of the total number of the various types of columns */ /* in U (or rows in VT), as described in IDXC. The fourth column */ /* type is any column which has been deflated. */ /* Z (input) DOUBLE PRECISION array, dimension (K) */ /* The first K elements of this array contain the components */ /* of the deflation-adjusted updating row vector. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = 1, an singular value did not converge */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ming Gu and Huan Ren, Computer Science Division, University of */ /* California at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --dsigma; u_dim1 = *ldu; u_offset = 1 + u_dim1; u -= u_offset; u2_dim1 = *ldu2; u2_offset = 1 + u2_dim1; u2 -= u2_offset; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1; vt -= vt_offset; vt2_dim1 = *ldvt2; vt2_offset = 1 + vt2_dim1; vt2 -= vt2_offset; --idxc; --ctot; --z__; /* Function Body */ *info = 0; if (*nl < 1) { *info = -1; } else if (*nr < 1) { *info = -2; } else if (*sqre != 1 && *sqre != 0) { *info = -3; } n = *nl + *nr + 1; m = n + *sqre; nlp1 = *nl + 1; nlp2 = *nl + 2; if (*k < 1 || *k > n) { *info = -4; } else if (*ldq < *k) { *info = -7; } else if (*ldu < n) { *info = -10; } else if (*ldu2 < n) { *info = -12; } else if (*ldvt < m) { *info = -14; } else if (*ldvt2 < m) { *info = -16; } if (*info != 0) { i__1 = -(*info); xerbla_("DLASD3", &i__1); return 0; } /* Quick return if possible */ if (*k == 1) { d__[1] = abs(z__[1]); dcopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt); if (z__[1] > 0.) { dcopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1); } else { i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { u[i__ + u_dim1] = -u2[i__ + u2_dim1]; /* L10: */ } } return 0; } /* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can */ /* be computed with high relative accuracy (barring over/underflow). */ /* This is a problem on machines without a guard digit in */ /* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */ /* The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), */ /* which on any of these machines zeros out the bottommost */ /* bit of DSIGMA(I) if it is 1; this makes the subsequent */ /* subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation */ /* occurs. On binary machines with a guard digit (almost all */ /* machines) it does not change DSIGMA(I) at all. On hexadecimal */ /* and decimal machines with a guard digit, it slightly */ /* changes the bottommost bits of DSIGMA(I). It does not account */ /* for hexadecimal or decimal machines without guard digits */ /* (we know of none). We use a subroutine call to compute */ /* 2*DSIGMA(I) to prevent optimizing compilers from eliminating */ /* this code. */ i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; /* L20: */ } /* Keep a copy of Z. */ dcopy_(k, &z__[1], &c__1, &q[q_offset], &c__1); /* Normalize Z. */ rho = dnrm2_(k, &z__[1], &c__1); dlascl_("G", &c__0, &c__0, &rho, &c_b13, k, &c__1, &z__[1], k, info); rho *= rho; /* Find the new singular values. */ i__1 = *k; for (j = 1; j <= i__1; ++j) { dlasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j], &vt[j * vt_dim1 + 1], info); /* If the zero finder fails, the computation is terminated. */ if (*info != 0) { return 0; } /* L30: */ } /* Compute updated Z. */ i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { z__[i__] = u[i__ + *k * u_dim1] * vt[i__ + *k * vt_dim1]; i__2 = i__ - 1; for (j = 1; j <= i__2; ++j) { z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[ i__] - dsigma[j]) / (dsigma[i__] + dsigma[j]); /* L40: */ } i__2 = *k - 1; for (j = i__; j <= i__2; ++j) { z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[ i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]); /* L50: */ } d__2 = sqrt((d__1 = z__[i__], abs(d__1))); z__[i__] = d_sign(&d__2, &q[i__ + q_dim1]); /* L60: */ } /* Compute left singular vectors of the modified diagonal matrix, */ /* and store related information for the right singular vectors. */ i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ * vt_dim1 + 1]; u[i__ * u_dim1 + 1] = -1.; i__2 = *k; for (j = 2; j <= i__2; ++j) { vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__ * vt_dim1]; u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1]; /* L70: */ } temp = dnrm2_(k, &u[i__ * u_dim1 + 1], &c__1); q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp; i__2 = *k; for (j = 2; j <= i__2; ++j) { jc = idxc[j]; q[j + i__ * q_dim1] = u[jc + i__ * u_dim1] / temp; /* L80: */ } /* L90: */ } /* Update the left singular vector matrix. */ if (*k == 2) { dgemm_("N", "N", &n, k, k, &c_b13, &u2[u2_offset], ldu2, &q[q_offset], ldq, &c_b26, &u[u_offset], ldu); goto L100; } if (ctot[1] > 0) { dgemm_("N", "N", nl, k, &ctot[1], &c_b13, &u2[(u2_dim1 << 1) + 1], ldu2, &q[q_dim1 + 2], ldq, &c_b26, &u[u_dim1 + 1], ldu); if (ctot[3] > 0) { ktemp = ctot[1] + 2 + ctot[2]; dgemm_("N", "N", nl, k, &ctot[3], &c_b13, &u2[ktemp * u2_dim1 + 1] , ldu2, &q[ktemp + q_dim1], ldq, &c_b13, &u[u_dim1 + 1], ldu); } } else if (ctot[3] > 0) { ktemp = ctot[1] + 2 + ctot[2]; dgemm_("N", "N", nl, k, &ctot[3], &c_b13, &u2[ktemp * u2_dim1 + 1], ldu2, &q[ktemp + q_dim1], ldq, &c_b26, &u[u_dim1 + 1], ldu); } else { dlacpy_("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu); } dcopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu); ktemp = ctot[1] + 2; ctemp = ctot[2] + ctot[3]; dgemm_("N", "N", nr, k, &ctemp, &c_b13, &u2[nlp2 + ktemp * u2_dim1], ldu2, &q[ktemp + q_dim1], ldq, &c_b26, &u[nlp2 + u_dim1], ldu); /* Generate the right singular vectors. */ L100: i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { temp = dnrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1); q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp; i__2 = *k; for (j = 2; j <= i__2; ++j) { jc = idxc[j]; q[i__ + j * q_dim1] = vt[jc + i__ * vt_dim1] / temp; /* L110: */ } /* L120: */ } /* Update the right singular vector matrix. */ if (*k == 2) { dgemm_("N", "N", k, &m, k, &c_b13, &q[q_offset], ldq, &vt2[vt2_offset] , ldvt2, &c_b26, &vt[vt_offset], ldvt); return 0; } ktemp = ctot[1] + 1; dgemm_("N", "N", k, &nlp1, &ktemp, &c_b13, &q[q_dim1 + 1], ldq, &vt2[ vt2_dim1 + 1], ldvt2, &c_b26, &vt[vt_dim1 + 1], ldvt); ktemp = ctot[1] + 2 + ctot[2]; if (ktemp <= *ldvt2) { dgemm_("N", "N", k, &nlp1, &ctot[3], &c_b13, &q[ktemp * q_dim1 + 1], ldq, &vt2[ktemp + vt2_dim1], ldvt2, &c_b13, &vt[vt_dim1 + 1], ldvt); } ktemp = ctot[1] + 1; nrp1 = *nr + *sqre; if (ktemp > 1) { i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { q[i__ + ktemp * q_dim1] = q[i__ + q_dim1]; /* L130: */ } i__1 = m; for (i__ = nlp2; i__ <= i__1; ++i__) { vt2[ktemp + i__ * vt2_dim1] = vt2[i__ * vt2_dim1 + 1]; /* L140: */ } } ctemp = ctot[2] + 1 + ctot[3]; dgemm_("N", "N", k, &nrp1, &ctemp, &c_b13, &q[ktemp * q_dim1 + 1], ldq, & vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b26, &vt[nlp2 * vt_dim1 + 1], ldvt); return 0; /* End of DLASD3 */ } /* dlasd3_ */
/* Subroutine */ int dlaed3_(integer *k, integer *n, integer *n1, doublereal * d__, doublereal *q, integer *ldq, doublereal *rho, doublereal *dlamda, doublereal *q2, integer *indx, integer *ctot, doublereal *w, doublereal *s, integer *info) { /* System generated locals */ integer q_dim1, q_offset, i__1, i__2; doublereal d__1; /* Local variables */ integer i__, j, n2, n12, ii, n23, iq2; doublereal temp; /* -- LAPACK routine (version 3.2) -- */ /* November 2006 */ /* Purpose */ /* ======= */ /* DLAED3 finds the roots of the secular equation, as defined by the */ /* values in D, W, and RHO, between 1 and K. It makes the */ /* appropriate calls to DLAED4 and then updates the eigenvectors by */ /* multiplying the matrix of eigenvectors of the pair of eigensystems */ /* being combined by the matrix of eigenvectors of the K-by-K system */ /* which is solved here. */ /* This code makes very mild assumptions about floating point */ /* arithmetic. It will work on machines with a guard digit in */ /* add/subtract, or on those binary machines without guard digits */ /* which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */ /* It could conceivably fail on hexadecimal or decimal machines */ /* without guard digits, but we know of none. */ /* Arguments */ /* ========= */ /* K (input) INTEGER */ /* The number of terms in the rational function to be solved by */ /* DLAED4. K >= 0. */ /* N (input) INTEGER */ /* The number of rows and columns in the Q matrix. */ /* N >= K (deflation may result in N>K). */ /* N1 (input) INTEGER */ /* The location of the last eigenvalue in the leading submatrix. */ /* min(1,N) <= N1 <= N/2. */ /* D (output) DOUBLE PRECISION array, dimension (N) */ /* D(I) contains the updated eigenvalues for */ /* 1 <= I <= K. */ /* Q (output) DOUBLE PRECISION array, dimension (LDQ,N) */ /* Initially the first K columns are used as workspace. */ /* On output the columns 1 to K contain */ /* the updated eigenvectors. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= max(1,N). */ /* RHO (input) DOUBLE PRECISION */ /* The value of the parameter in the rank one update equation. */ /* RHO >= 0 required. */ /* DLAMDA (input/output) DOUBLE PRECISION array, dimension (K) */ /* The first K elements of this array contain the old roots */ /* of the deflated updating problem. These are the poles */ /* of the secular equation. May be changed on output by */ /* having lowest order bit set to zero on Cray X-MP, Cray Y-MP, */ /* Cray-2, or Cray C-90, as described above. */ /* Q2 (input) DOUBLE PRECISION array, dimension (LDQ2, N) */ /* The first K columns of this matrix contain the non-deflated */ /* eigenvectors for the split problem. */ /* INDX (input) INTEGER array, dimension (N) */ /* The permutation used to arrange the columns of the deflated */ /* Q matrix into three groups (see DLAED2). */ /* The rows of the eigenvectors found by DLAED4 must be likewise */ /* permuted before the matrix multiply can take place. */ /* CTOT (input) INTEGER array, dimension (4) */ /* A count of the total number of the various types of columns */ /* in Q, as described in INDX. The fourth column type is any */ /* column which has been deflated. */ /* W (input/output) DOUBLE PRECISION array, dimension (K) */ /* The first K elements of this array contain the components */ /* of the deflation-adjusted updating vector. Destroyed on */ /* output. */ /* S (workspace) DOUBLE PRECISION array, dimension (N1 + 1)*K */ /* Will contain the eigenvectors of the repaired matrix which */ /* will be multiplied by the previously accumulated eigenvectors */ /* to update the system. */ /* LDS (input) INTEGER */ /* The leading dimension of S. LDS >= max(1,K). */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = 1, an eigenvalue did not converge */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Jeff Rutter, Computer Science Division, University of California */ /* at Berkeley, USA */ /* Modified by Francoise Tisseur, University of Tennessee. */ /* ===================================================================== */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --dlamda; --q2; --indx; --ctot; --w; --s; /* Function Body */ *info = 0; if (*k < 0) { *info = -1; } else if (*n < *k) { *info = -2; } else if (*ldq < max(1,*n)) { *info = -6; } if (*info != 0) { i__1 = -(*info); xerbla_("DLAED3", &i__1); return 0; } /* Quick return if possible */ if (*k == 0) { return 0; } /* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can */ /* be computed with high relative accuracy (barring over/underflow). */ /* This is a problem on machines without a guard digit in */ /* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */ /* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), */ /* which on any of these machines zeros out the bottommost */ /* bit of DLAMDA(I) if it is 1; this makes the subsequent */ /* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation */ /* occurs. On binary machines with a guard digit (almost all */ /* machines) it does not change DLAMDA(I) at all. On hexadecimal */ /* and decimal machines with a guard digit, it slightly */ /* changes the bottommost bits of DLAMDA(I). It does not account */ /* for hexadecimal or decimal machines without guard digits */ /* (we know of none). We use a subroutine call to compute */ /* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating */ /* this code. */ i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; } i__1 = *k; for (j = 1; j <= i__1; ++j) { dlaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], info); /* If the zero finder fails, the computation is terminated. */ if (*info != 0) { goto L120; } } if (*k == 1) { goto L110; } if (*k == 2) { i__1 = *k; for (j = 1; j <= i__1; ++j) { w[1] = q[j * q_dim1 + 1]; w[2] = q[j * q_dim1 + 2]; ii = indx[1]; q[j * q_dim1 + 1] = w[ii]; ii = indx[2]; q[j * q_dim1 + 2] = w[ii]; } goto L110; } /* Compute updated W. */ dcopy_(k, &w[1], &c__1, &s[1], &c__1); /* Initialize W(I) = Q(I,I) */ i__1 = *ldq + 1; dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1); i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); } } i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { d__1 = sqrt(-w[i__]); w[i__] = d_sign(&d__1, &s[i__]); } /* Compute eigenvectors of the modified rank-1 modification. */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = *k; for (i__ = 1; i__ <= i__2; ++i__) { s[i__] = w[i__] / q[i__ + j * q_dim1]; } temp = dnrm2_(k, &s[1], &c__1); i__2 = *k; for (i__ = 1; i__ <= i__2; ++i__) { ii = indx[i__]; q[i__ + j * q_dim1] = s[ii] / temp; } } /* Compute the updated eigenvectors. */ L110: n2 = *n - *n1; n12 = ctot[1] + ctot[2]; n23 = ctot[2] + ctot[3]; dlacpy_("A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23); iq2 = *n1 * n12 + 1; if (n23 != 0) { dgemm_("N", "N", &n2, k, &n23, &c_b22, &q2[iq2], &n2, &s[1], &n23, & c_b23, &q[*n1 + 1 + q_dim1], ldq); } else { dlaset_("A", &n2, k, &c_b23, &c_b23, &q[*n1 + 1 + q_dim1], ldq); } dlacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12); if (n12 != 0) { dgemm_("N", "N", n1, k, &n12, &c_b22, &q2[1], n1, &s[1], &n12, &c_b23, &q[q_offset], ldq); } else { dlaset_("A", n1, k, &c_b23, &c_b23, &q[q_dim1 + 1], ldq); } L120: return 0; /* End of DLAED3 */ } /* dlaed3_ */
/*! \brief <pre> Purpose ======= DLAMC5 attempts to compute RMAX, the largest machine floating-point number, without overflow. It assumes that EMAX + abs(EMIN) sum approximately to a power of 2. It will fail on machines where this assumption does not hold, for example, the Cyber 205 (EMIN = -28625, EMAX = 28718). It will also fail if the value supplied for EMIN is too large (i.e. too close to zero), probably with overflow. Arguments ========= BETA (input) INT The base of floating-point arithmetic. P (input) INT The number of base BETA digits in the mantissa of a floating-point value. EMIN (input) INT The minimum exponent before (gradual) underflow. IEEE (input) INT A int flag specifying whether or not the arithmetic system is thought to comply with the IEEE standard. EMAX (output) INT The largest exponent before overflow RMAX (output) DOUBLE PRECISION The largest machine floating-point number. ===================================================================== First compute LEXP and UEXP, two powers of 2 that bound abs(EMIN). We then assume that EMAX + abs(EMIN) will sum approximately to the bound that is closest to abs(EMIN). (EMAX is the exponent of the required number RMAX). </pre> */ int dlamc5_(int *beta, int *p, int *emin, int *ieee, int *emax, double *rmax) { /* Table of constant values */ static double c_b5 = 0.; /* System generated locals */ int i__1; double d__1; /* Local variables */ static int lexp; static double oldy; static int uexp, i; static double y, z; static int nbits; extern double dlamc3_(double *, double *); static double recbas; static int exbits, expsum, try__; lexp = 1; exbits = 1; L10: try__ = lexp << 1; if (try__ <= -(*emin)) { lexp = try__; ++exbits; goto L10; } if (lexp == -(*emin)) { uexp = lexp; } else { uexp = try__; ++exbits; } /* Now -LEXP is less than or equal to EMIN, and -UEXP is greater than or equal to EMIN. EXBITS is the number of bits needed to store the exponent. */ if (uexp + *emin > -lexp - *emin) { expsum = lexp << 1; } else { expsum = uexp << 1; } /* EXPSUM is the exponent range, approximately equal to EMAX - EMIN + 1 . */ *emax = expsum + *emin - 1; nbits = exbits + 1 + *p; /* NBITS is the total number of bits needed to store a floating-point number. */ if (nbits % 2 == 1 && *beta == 2) { /* Either there are an odd number of bits used to store a floating-point number, which is unlikely, or some bits are not used in the representation of numbers, which is possible , (e.g. Cray machines) or the mantissa has an implicit bit, (e.g. IEEE machines, Dec Vax machines), which is perhaps the most likely. We have to assume the last alternative. If this is true, then we need to reduce EMAX by one because there must be some way of representing zero in an implicit-b it system. On machines like Cray, we are reducing EMAX by one unnecessarily. */ --(*emax); } if (*ieee) { /* Assume we are on an IEEE machine which reserves one exponent for infinity and NaN. */ --(*emax); } /* Now create RMAX, the largest machine number, which should be equal to (1.0 - BETA**(-P)) * BETA**EMAX . First compute 1.0 - BETA**(-P), being careful that the result is less than 1.0 . */ recbas = 1. / *beta; z = *beta - 1.; y = 0.; i__1 = *p; for (i = 1; i <= *p; ++i) { z *= recbas; if (y < 1.) { oldy = y; } y = dlamc3_(&y, &z); /* L20: */ } if (y >= 1.) { y = oldy; } /* Now multiply by BETA**EMAX to get RMAX. */ i__1 = *emax; for (i = 1; i <= *emax; ++i) { d__1 = y * *beta; y = dlamc3_(&d__1, &c_b5); /* L30: */ } *rmax = y; return 0; /* End of DLAMC5 */ } /* dlamc5_ */
/*! \brief <pre> Purpose ======= DLAMC2 determines the machine parameters specified in its argument list. Arguments ========= BETA (output) INT The base of the machine. T (output) INT The number of ( BETA ) digits in the mantissa. RND (output) INT Specifies whether proper rounding ( RND = .TRUE. ) or chopping ( RND = .FALSE. ) occurs in addition. This may not be a reliable guide to the way in which the machine performs its arithmetic. EPS (output) DOUBLE PRECISION The smallest positive number such that fl( 1.0 - EPS ) .LT. 1.0, where fl denotes the computed value. EMIN (output) INT The minimum exponent before (gradual) underflow occurs. RMIN (output) DOUBLE PRECISION The smallest normalized number for the machine, given by BASE**( EMIN - 1 ), where BASE is the floating point value of BETA. EMAX (output) INT The maximum exponent before overflow occurs. RMAX (output) DOUBLE PRECISION The largest positive number for the machine, given by BASE**EMAX * ( 1 - EPS ), where BASE is the floating point value of BETA. Further Details =============== The computation of EPS is based on a routine PARANOIA by W. Kahan of the University of California at Berkeley. ===================================================================== </pre> */ int dlamc2_(int *beta, int *t, int *rnd, double *eps, int *emin, double *rmin, int *emax, double *rmax) { /* Table of constant values */ static int c__1 = 1; /* Initialized data */ static int first = TRUE_; static int iwarn = FALSE_; /* System generated locals */ int i__1; double d__1, d__2, d__3, d__4, d__5; /* Builtin functions */ double pow_di(double *, int *); /* Local variables */ static int ieee; static double half; static int lrnd; static double leps, zero, a, b, c; static int i, lbeta; static double rbase; static int lemin, lemax, gnmin; static double small; static int gpmin; static double third, lrmin, lrmax, sixth; extern /* Subroutine */ int dlamc1_(int *, int *, int *, int *); extern double dlamc3_(double *, double *); static int lieee1; extern /* Subroutine */ int dlamc4_(int *, double *, int *), dlamc5_(int *, int *, int *, int *, int *, double *); static int lt, ngnmin, ngpmin; static double one, two; if (first) { first = FALSE_; zero = 0.; one = 1.; two = 2.; /* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of BETA, T, RND, EPS, EMIN and RMIN. Throughout this routine we use the function DLAMC3 to ens ure that relevant values are stored and not held in registers, or are not affected by optimizers. DLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. */ dlamc1_(&lbeta, <, &lrnd, &lieee1); /* Start to find EPS. */ b = (double) lbeta; i__1 = -lt; a = pow_di(&b, &i__1); leps = a; /* Try some tricks to see whether or not this is the correct E PS. */ b = two / 3; half = one / 2; d__1 = -half; sixth = dlamc3_(&b, &d__1); third = dlamc3_(&sixth, &sixth); d__1 = -half; b = dlamc3_(&third, &d__1); b = dlamc3_(&b, &sixth); b = abs(b); if (b < leps) { b = leps; } leps = 1.; /* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */ L10: if (leps > b && b > zero) { leps = b; d__1 = half * leps; /* Computing 5th power */ d__3 = two, d__4 = d__3, d__3 *= d__3; /* Computing 2nd power */ d__5 = leps; d__2 = d__4 * (d__3 * d__3) * (d__5 * d__5); c = dlamc3_(&d__1, &d__2); d__1 = -c; c = dlamc3_(&half, &d__1); b = dlamc3_(&half, &c); d__1 = -b; c = dlamc3_(&half, &d__1); b = dlamc3_(&half, &c); goto L10; } /* + END WHILE */ if (a < leps) { leps = a; } /* Computation of EPS complete. Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3 )). Keep dividing A by BETA until (gradual) underflow occurs. T his is detected when we cannot recover the previous A. */ rbase = one / lbeta; small = one; for (i = 1; i <= 3; ++i) { d__1 = small * rbase; small = dlamc3_(&d__1, &zero); /* L20: */ } a = dlamc3_(&one, &small); dlamc4_(&ngpmin, &one, &lbeta); d__1 = -one; dlamc4_(&ngnmin, &d__1, &lbeta); dlamc4_(&gpmin, &a, &lbeta); d__1 = -a; dlamc4_(&gnmin, &d__1, &lbeta); ieee = FALSE_; if (ngpmin == ngnmin && gpmin == gnmin) { if (ngpmin == gpmin) { lemin = ngpmin; /* ( Non twos-complement machines, no gradual under flow; e.g., VAX ) */ } else if (gpmin - ngpmin == 3) { lemin = ngpmin - 1 + lt; ieee = TRUE_; /* ( Non twos-complement machines, with gradual und erflow; e.g., IEEE standard followers ) */ } else { lemin = min(ngpmin,gpmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else if (ngpmin == gpmin && ngnmin == gnmin) { if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) { lemin = max(ngpmin,ngnmin); /* ( Twos-complement machines, no gradual underflow ; e.g., CYBER 205 ) */ } else { lemin = min(ngpmin,ngnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin) { if (gpmin - min(ngpmin,ngnmin) == 3) { lemin = max(ngpmin,ngnmin) - 1 + lt; /* ( Twos-complement machines with gradual underflo w; no known machine ) */ } else { lemin = min(ngpmin,ngnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else { /* Computing MIN */ i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin); lemin = min(i__1,gnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } /* ** Comment out this if block if EMIN is ok */ if (iwarn) { first = TRUE_; printf("\n\n WARNING. The value EMIN may be incorrect:- "); printf("EMIN = %8i\n",lemin); printf("If, after inspection, the value EMIN looks acceptable"); printf("please comment out \n the IF block as marked within the"); printf("code of routine DLAMC2, \n otherwise supply EMIN"); printf("explicitly.\n"); } /* ** Assume IEEE arithmetic if we found denormalised numbers abo ve, or if arithmetic seems to round in the IEEE style, determi ned in routine DLAMC1. A true IEEE machine should have both thi ngs true; however, faulty machines may have one or the other. */ ieee = ieee || lieee1; /* Compute RMIN by successive division by BETA. We could comp ute RMIN as BASE**( EMIN - 1 ), but some machines underflow dur ing this computation. */ lrmin = 1.; i__1 = 1 - lemin; for (i = 1; i <= 1-lemin; ++i) { d__1 = lrmin * rbase; lrmin = dlamc3_(&d__1, &zero); /* L30: */ } /* Finally, call DLAMC5 to compute EMAX and RMAX. */ dlamc5_(&lbeta, <, &lemin, &ieee, &lemax, &lrmax); } *beta = lbeta; *t = lt; *rnd = lrnd; *eps = leps; *emin = lemin; *rmin = lrmin; *emax = lemax; *rmax = lrmax; return 0; /* End of DLAMC2 */ } /* dlamc2_ */
/*! \brief <pre> Purpose ======= DLAMC1 determines the machine parameters given by BETA, T, RND, and IEEE1. Arguments ========= BETA (output) INT The base of the machine. T (output) INT The number of ( BETA ) digits in the mantissa. RND (output) INT Specifies whether proper rounding ( RND = .TRUE. ) or chopping ( RND = .FALSE. ) occurs in addition. This may not be a reliable guide to the way in which the machine performs its arithmetic. IEEE1 (output) INT Specifies whether rounding appears to be done in the IEEE 'round to nearest' style. Further Details =============== The routine is based on the routine ENVRON by Malcolm and incorporates suggestions by Gentleman and Marovich. See Malcolm M. A. (1972) Algorithms to reveal properties of floating-point arithmetic. Comms. of the ACM, 15, 949-951. Gentleman W. M. and Marovich S. B. (1974) More on algorithms that reveal properties of floating point arithmetic units. Comms. of the ACM, 17, 276-277. ===================================================================== </pre> */ int dlamc1_(int *beta, int *t, int *rnd, int *ieee1) { /* Initialized data */ static int first = TRUE_; /* System generated locals */ double d__1, d__2; /* Local variables */ static int lrnd; static double a, b, c, f; static int lbeta; static double savec; extern double dlamc3_(double *, double *); static int lieee1; static double t1, t2; static int lt; static double one, qtr; if (first) { first = FALSE_; one = 1.; /* LBETA, LIEEE1, LT and LRND are the local values of BE TA, IEEE1, T and RND. Throughout this routine we use the function DLAMC3 to ens ure that relevant values are stored and not held in registers, or are not affected by optimizers. Compute a = 2.0**m with the smallest positive integer m s uch that fl( a + 1.0 ) = a. */ a = 1.; c = 1.; /* + WHILE( C.EQ.ONE )LOOP */ L10: if (c == one) { a *= 2; c = dlamc3_(&a, &one); d__1 = -a; c = dlamc3_(&c, &d__1); goto L10; } /* + END WHILE Now compute b = 2.0**m with the smallest positive integer m such that fl( a + b ) .gt. a. */ b = 1.; c = dlamc3_(&a, &b); /* + WHILE( C.EQ.A )LOOP */ L20: if (c == a) { b *= 2; c = dlamc3_(&a, &b); goto L20; } /* + END WHILE Now compute the base. a and c are neighbouring floating po int numbers in the interval ( beta**t, beta**( t + 1 ) ) and so their difference is beta. Adding 0.25 to c is to ensure that it is truncated to beta and not ( beta - 1 ). */ qtr = one / 4; savec = c; d__1 = -a; c = dlamc3_(&c, &d__1); lbeta = (int) (c + qtr); /* Now determine whether rounding or chopping occurs, by addin g a bit less than beta/2 and a bit more than beta/2 to a. */ b = (double) lbeta; d__1 = b / 2; d__2 = -b / 100; f = dlamc3_(&d__1, &d__2); c = dlamc3_(&f, &a); if (c == a) { lrnd = TRUE_; } else { lrnd = FALSE_; } d__1 = b / 2; d__2 = b / 100; f = dlamc3_(&d__1, &d__2); c = dlamc3_(&f, &a); if (lrnd && c == a) { lrnd = FALSE_; } /* Try and decide whether rounding is done in the IEEE 'round to nearest' style. B/2 is half a unit in the last place of the two numbers A and SAVEC. Furthermore, A is even, i.e. has last bit zero, and SAVEC is odd. Thus adding B/2 to A should not cha nge A, but adding B/2 to SAVEC should change SAVEC. */ d__1 = b / 2; t1 = dlamc3_(&d__1, &a); d__1 = b / 2; t2 = dlamc3_(&d__1, &savec); lieee1 = t1 == a && t2 > savec && lrnd; /* Now find the mantissa, t. It should be the integer part of log to the base beta of a, however it is safer to determine t by powering. So we find t as the smallest positive integer for which fl( beta**t + 1.0 ) = 1.0. */ lt = 0; a = 1.; c = 1.; /* + WHILE( C.EQ.ONE )LOOP */ L30: if (c == one) { ++lt; a *= lbeta; c = dlamc3_(&a, &one); d__1 = -a; c = dlamc3_(&c, &d__1); goto L30; } /* + END WHILE */ } *beta = lbeta; *t = lt; *rnd = lrnd; *ieee1 = lieee1; return 0; /* End of DLAMC1 */ } /* dlamc1_ */
/*< SUBROUTINE DLAMC2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX ) >*/ /* Subroutine */ int dlamc2_(integer *beta, integer *t, logical *rnd, doublereal *eps, integer *emin, doublereal *rmin, integer *emax, doublereal *rmax) { /* Initialized data */ static logical first = TRUE_; /* runtime-initialized constant */ static logical iwarn = FALSE_; /* runtime-initialized constant */ /* System generated locals */ integer i__1; doublereal d__1, d__2, d__3, d__4, d__5; /* Builtin functions */ double pow_di(doublereal *, integer *); integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(); /* Local variables */ doublereal a, b, c__; integer i__; static integer lt; /* runtime-initialized constant */ doublereal one, two; logical ieee; doublereal half; logical lrnd = 0; //variable 'lrnd' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] static doublereal leps; /* runtime-initialized constant */ doublereal zero; static integer lbeta; /* runtime-initialized constant */ doublereal rbase; static integer lemin, lemax; /* runtime-initialized constant */ integer gnmin; doublereal small; integer gpmin; doublereal third; static doublereal lrmin, lrmax; /* runtime-initialized constant */ doublereal sixth; extern /* Subroutine */ int dlamc1_(integer *, integer *, logical *, logical *); extern doublereal dlamc3_(doublereal *, doublereal *); logical lieee1; extern /* Subroutine */ int dlamc4_(integer *, doublereal *, integer *), dlamc5_(integer *, integer *, integer *, logical *, integer *, doublereal *); integer ngnmin, ngpmin; /* -- LAPACK auxiliary routine (version 1.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ /* Courant Institute, Argonne National Lab, and Rice University */ /* October 31, 1992 */ /* .. Scalar Arguments .. */ /*< LOGICAL RND >*/ /*< INTEGER BETA, EMAX, EMIN, T >*/ /*< DOUBLE PRECISION EPS, RMAX, RMIN >*/ /* .. */ /* Purpose */ /* ======= */ /* DLAMC2 determines the machine parameters specified in its argument */ /* list. */ /* Arguments */ /* ========= */ /* BETA (output) INTEGER */ /* The base of the machine. */ /* T (output) INTEGER */ /* The number of ( BETA ) digits in the mantissa. */ /* RND (output) LOGICAL */ /* Specifies whether proper rounding ( RND = .TRUE. ) or */ /* chopping ( RND = .FALSE. ) occurs in addition. This may not */ /* be a reliable guide to the way in which the machine performs */ /* its arithmetic. */ /* EPS (output) DOUBLE PRECISION */ /* The smallest positive number such that */ /* fl( 1.0 - EPS ) .LT. 1.0, */ /* where fl denotes the computed value. */ /* EMIN (output) INTEGER */ /* The minimum exponent before (gradual) underflow occurs. */ /* RMIN (output) DOUBLE PRECISION */ /* The smallest normalized number for the machine, given by */ /* BASE**( EMIN - 1 ), where BASE is the floating point value */ /* of BETA. */ /* EMAX (output) INTEGER */ /* The maximum exponent before overflow occurs. */ /* RMAX (output) DOUBLE PRECISION */ /* The largest positive number for the machine, given by */ /* BASE**EMAX * ( 1 - EPS ), where BASE is the floating point */ /* value of BETA. */ /* Further Details */ /* =============== */ /* The computation of EPS is based on a routine PARANOIA by */ /* W. Kahan of the University of California at Berkeley. */ /* ===================================================================== */ /* .. Local Scalars .. */ /*< LOGICAL FIRST, IEEE, IWARN, LIEEE1, LRND >*/ /*< >*/ /*< >*/ /* .. */ /* .. External Functions .. */ /*< DOUBLE PRECISION DLAMC3 >*/ /*< EXTERNAL DLAMC3 >*/ /* .. */ /* .. External Subroutines .. */ /*< EXTERNAL DLAMC1, DLAMC4, DLAMC5 >*/ /* .. */ /* .. Intrinsic Functions .. */ /*< INTRINSIC ABS, MAX, MIN >*/ /* .. */ /* .. Save statement .. */ /*< >*/ /* .. */ /* .. Data statements .. */ /*< DATA FIRST / .TRUE. / , IWARN / .FALSE. / >*/ /* .. */ /* .. Executable Statements .. */ /*< IF( FIRST ) THEN >*/ if (first) { /*< FIRST = .FALSE. >*/ first = FALSE_; /*< ZERO = 0 >*/ zero = 0.; /*< ONE = 1 >*/ one = 1.; /*< TWO = 2 >*/ two = 2.; /* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of */ /* BETA, T, RND, EPS, EMIN and RMIN. */ /* Throughout this routine we use the function DLAMC3 to ensure */ /* that relevant values are stored and not held in registers, or */ /* are not affected by optimizers. */ /* DLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. */ /*< CALL DLAMC1( LBETA, LT, LRND, LIEEE1 ) >*/ dlamc1_(&lbeta, <, &lrnd, &lieee1); /* Start to find EPS. */ /*< B = LBETA >*/ b = (doublereal) lbeta; /*< A = B**( -LT ) >*/ i__1 = -lt; a = pow_di(&b, &i__1); /*< LEPS = A >*/ leps = a; /* Try some tricks to see whether or not this is the correct EPS. */ /*< B = TWO / 3 >*/ b = two / 3; /*< HALF = ONE / 2 >*/ half = one / 2; /*< SIXTH = DLAMC3( B, -HALF ) >*/ d__1 = -half; sixth = dlamc3_(&b, &d__1); /*< THIRD = DLAMC3( SIXTH, SIXTH ) >*/ third = dlamc3_(&sixth, &sixth); /*< B = DLAMC3( THIRD, -HALF ) >*/ d__1 = -half; b = dlamc3_(&third, &d__1); /*< B = DLAMC3( B, SIXTH ) >*/ b = dlamc3_(&b, &sixth); /*< B = ABS( B ) >*/ b = abs(b); /*< >*/ if (b < leps) { b = leps; } /*< LEPS = 1 >*/ leps = 1.; /* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */ /*< 10 CONTINUE >*/ L10: /*< IF( ( LEPS.GT.B ) .AND. ( B.GT.ZERO ) ) THEN >*/ if (leps > b && b > zero) { /*< LEPS = B >*/ leps = b; /*< C = DLAMC3( HALF*LEPS, ( TWO**5 )*( LEPS**2 ) ) >*/ d__1 = half * leps; /* Computing 5th power */ d__3 = two, d__4 = d__3, d__3 *= d__3; /* Computing 2nd power */ d__5 = leps; d__2 = d__4 * (d__3 * d__3) * (d__5 * d__5); c__ = dlamc3_(&d__1, &d__2); /*< C = DLAMC3( HALF, -C ) >*/ d__1 = -c__; c__ = dlamc3_(&half, &d__1); /*< B = DLAMC3( HALF, C ) >*/ b = dlamc3_(&half, &c__); /*< C = DLAMC3( HALF, -B ) >*/ d__1 = -b; c__ = dlamc3_(&half, &d__1); /*< B = DLAMC3( HALF, C ) >*/ b = dlamc3_(&half, &c__); /*< GO TO 10 >*/ goto L10; /*< END IF >*/ } /* + END WHILE */ /*< >*/ if (a < leps) { leps = a; } /* Computation of EPS complete. */ /* Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). */ /* Keep dividing A by BETA until (gradual) underflow occurs. This */ /* is detected when we cannot recover the previous A. */ /*< RBASE = ONE / LBETA >*/ rbase = one / lbeta; /*< SMALL = ONE >*/ small = one; /*< DO 20 I = 1, 3 >*/ for (i__ = 1; i__ <= 3; ++i__) { /*< SMALL = DLAMC3( SMALL*RBASE, ZERO ) >*/ d__1 = small * rbase; small = dlamc3_(&d__1, &zero); /*< 20 CONTINUE >*/ /* L20: */ } /*< A = DLAMC3( ONE, SMALL ) >*/ a = dlamc3_(&one, &small); /*< CALL DLAMC4( NGPMIN, ONE, LBETA ) >*/ dlamc4_(&ngpmin, &one, &lbeta); /*< CALL DLAMC4( NGNMIN, -ONE, LBETA ) >*/ d__1 = -one; dlamc4_(&ngnmin, &d__1, &lbeta); /*< CALL DLAMC4( GPMIN, A, LBETA ) >*/ dlamc4_(&gpmin, &a, &lbeta); /*< CALL DLAMC4( GNMIN, -A, LBETA ) >*/ d__1 = -a; dlamc4_(&gnmin, &d__1, &lbeta); /*< IEEE = .FALSE. >*/ ieee = FALSE_; /*< IF( ( NGPMIN.EQ.NGNMIN ) .AND. ( GPMIN.EQ.GNMIN ) ) THEN >*/ if (ngpmin == ngnmin && gpmin == gnmin) { /*< IF( NGPMIN.EQ.GPMIN ) THEN >*/ if (ngpmin == gpmin) { /*< LEMIN = NGPMIN >*/ lemin = ngpmin; /* ( Non twos-complement machines, no gradual underflow; */ /* e.g., VAX ) */ /*< ELSE IF( ( GPMIN-NGPMIN ).EQ.3 ) THEN >*/ } else if (gpmin - ngpmin == 3) { /*< LEMIN = NGPMIN - 1 + LT >*/ lemin = ngpmin - 1 + lt; /*< IEEE = .TRUE. >*/ ieee = TRUE_; /* ( Non twos-complement machines, with gradual underflow; */ /* e.g., IEEE standard followers ) */ /*< ELSE >*/ } else { /*< LEMIN = MIN( NGPMIN, GPMIN ) >*/ lemin = min(ngpmin,gpmin); /* ( A guess; no known machine ) */ /*< IWARN = .TRUE. >*/ iwarn = TRUE_; /*< END IF >*/ } /*< ELSE IF( ( NGPMIN.EQ.GPMIN ) .AND. ( NGNMIN.EQ.GNMIN ) ) THEN >*/ } else if (ngpmin == gpmin && ngnmin == gnmin) { /*< IF( ABS( NGPMIN-NGNMIN ).EQ.1 ) THEN >*/ if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) { /*< LEMIN = MAX( NGPMIN, NGNMIN ) >*/ lemin = max(ngpmin,ngnmin); /* ( Twos-complement machines, no gradual underflow; */ /* e.g., CYBER 205 ) */ /*< ELSE >*/ } else { /*< LEMIN = MIN( NGPMIN, NGNMIN ) >*/ lemin = min(ngpmin,ngnmin); /* ( A guess; no known machine ) */ /*< IWARN = .TRUE. >*/ iwarn = TRUE_; /*< END IF >*/ } /*< >*/ } else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin) { /*< IF( ( GPMIN-MIN( NGPMIN, NGNMIN ) ).EQ.3 ) THEN >*/ if (gpmin - min(ngpmin,ngnmin) == 3) { /*< LEMIN = MAX( NGPMIN, NGNMIN ) - 1 + LT >*/ lemin = max(ngpmin,ngnmin) - 1 + lt; /* ( Twos-complement machines with gradual underflow; */ /* no known machine ) */ /*< ELSE >*/ } else { /*< LEMIN = MIN( NGPMIN, NGNMIN ) >*/ lemin = min(ngpmin,ngnmin); /* ( A guess; no known machine ) */ /*< IWARN = .TRUE. >*/ iwarn = TRUE_; /*< END IF >*/ } /*< ELSE >*/ } else { /*< LEMIN = MIN( NGPMIN, NGNMIN, GPMIN, GNMIN ) >*/ /* Computing MIN */ i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin); lemin = min(i__1,gnmin); /* ( A guess; no known machine ) */ /*< IWARN = .TRUE. >*/ iwarn = TRUE_; /*< END IF >*/ } /* ** */ /* Comment out this if block if EMIN is ok */ /*< IF( IWARN ) THEN >*/ /*< FIRST = .TRUE. >*/ /*< WRITE( 6, FMT = 9999 )LEMIN >*/ /*< END IF >*/ if (iwarn) { first = TRUE_; printf("\n\n WARNING. The value EMIN may be incorrect: - "); printf("EMIN = %8ld\n", lemin); printf("If, after inspection, the value EMIN looks acceptable "); printf("please comment out\n the IF block as marked within the "); printf("code of routine DLAMC2,\n otherwise supply EMIN "); printf("explicitly.\n"); } /* ** */ /* Assume IEEE arithmetic if we found denormalised numbers above, */ /* or if arithmetic seems to round in the IEEE style, determined */ /* in routine DLAMC1. A true IEEE machine should have both things */ /* true; however, faulty machines may have one or the other. */ /*< IEEE = IEEE .OR. LIEEE1 >*/ ieee = ieee || lieee1; /* Compute RMIN by successive division by BETA. We could compute */ /* RMIN as BASE**( EMIN - 1 ), but some machines underflow during */ /* this computation. */ /*< LRMIN = 1 >*/ lrmin = 1.; /*< DO 30 I = 1, 1 - LEMIN >*/ i__1 = 1 - lemin; for (i__ = 1; i__ <= i__1; ++i__) { /*< LRMIN = DLAMC3( LRMIN*RBASE, ZERO ) >*/ d__1 = lrmin * rbase; lrmin = dlamc3_(&d__1, &zero); /*< 30 CONTINUE >*/ /* L30: */ } /* Finally, call DLAMC5 to compute EMAX and RMAX. */ /*< CALL DLAMC5( LBETA, LT, LEMIN, IEEE, LEMAX, LRMAX ) >*/ dlamc5_(&lbeta, <, &lemin, &ieee, &lemax, &lrmax); /*< END IF >*/ } /*< BETA = LBETA >*/ *beta = lbeta; /*< T = LT >*/ *t = lt; /*< RND = LRND >*/ *rnd = lrnd; /*< EPS = LEPS >*/ *eps = leps; /*< EMIN = LEMIN >*/ *emin = lemin; /*< RMIN = LRMIN >*/ *rmin = lrmin; /*< EMAX = LEMAX >*/ *emax = lemax; /*< RMAX = LRMAX >*/ *rmax = lrmax; /*< RETURN >*/ return 0; /*< 9 >*/ /* End of DLAMC2 */ /*< END >*/ } /* dlamc2_ */
/* Subroutine */ int dlamc1_(int *beta, int *t, int *rnd, int *ieee1) { /* -- LAPACK auxiliary routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 Purpose ======= DLAMC1 determines the machine parameters given by BETA, T, RND, and IEEE1. Arguments ========= BETA (output) INT The base of the machine. T (output) INT The number of ( BETA ) digits in the mantissa. RND (output) INT Specifies whether proper rounding ( RND = .TRUE. ) or chopping ( RND = .FALSE. ) occurs in addition. This may not be a reliable guide to the way in which the machine performs its arithmetic. IEEE1 (output) INT Specifies whether rounding appears to be done in the IEEE 'round to nearest' style. Further Details =============== The routine is based on the routine ENVRON by Malcolm and incorporates suggestions by Gentleman and Marovich. See Malcolm M. A. (1972) Algorithms to reveal properties of floating-point arithmetic. Comms. of the ACM, 15, 949-951. Gentleman W. M. and Marovich S. B. (1974) More on algorithms that reveal properties of floating point arithmetic units. Comms. of the ACM, 17, 276-277. ===================================================================== */ /* Initialized data */ static int first = TRUE_; /* System generated locals */ double d__1, d__2; /* Local variables */ static int lrnd; static double a, b, c, f; static int lbeta; static double savec; extern double dlamc3_(double *, double *); static int lieee1; static double t1, t2; static int lt; static double one, qtr; if (first) { first = FALSE_; one = 1.; /* LBETA, LIEEE1, LT and LRND are the local values of BE TA, IEEE1, T and RND. Throughout this routine we use the function DLAMC3 to ens ure that relevant values are stored and not held in registers, or are not affected by optimizers. Compute a = 2.0**m with the smallest positive integer m s uch that fl( a + 1.0 ) = a. */ a = 1.; c = 1.; /* + WHILE( C.EQ.ONE )LOOP */ L10: if (c == one) { a *= 2; c = dlamc3_(&a, &one); d__1 = -a; c = dlamc3_(&c, &d__1); goto L10; } /* + END WHILE Now compute b = 2.0**m with the smallest positive integer m such that fl( a + b ) .gt. a. */ b = 1.; c = dlamc3_(&a, &b); /* + WHILE( C.EQ.A )LOOP */ L20: if (c == a) { b *= 2; c = dlamc3_(&a, &b); goto L20; } /* + END WHILE Now compute the base. a and c are neighbouring floating po int numbers in the interval ( beta**t, beta**( t + 1 ) ) and so their difference is beta. Adding 0.25 to c is to ensure that it is truncated to beta and not ( beta - 1 ). */ qtr = one / 4; savec = c; d__1 = -a; c = dlamc3_(&c, &d__1); lbeta = (int) (c + qtr); /* Now determine whether rounding or chopping occurs, by addin g a bit less than beta/2 and a bit more than beta/2 to a. */ b = (double) lbeta; d__1 = b / 2; d__2 = -b / 100; f = dlamc3_(&d__1, &d__2); c = dlamc3_(&f, &a); if (c == a) { lrnd = TRUE_; } else { lrnd = FALSE_; } d__1 = b / 2; d__2 = b / 100; f = dlamc3_(&d__1, &d__2); c = dlamc3_(&f, &a); if (lrnd && c == a) { lrnd = FALSE_; } /* Try and decide whether rounding is done in the IEEE 'round to nearest' style. B/2 is half a unit in the last place of the two numbers A and SAVEC. Furthermore, A is even, i.e. has last bit zero, and SAVEC is odd. Thus adding B/2 to A should not cha nge A, but adding B/2 to SAVEC should change SAVEC. */ d__1 = b / 2; t1 = dlamc3_(&d__1, &a); d__1 = b / 2; t2 = dlamc3_(&d__1, &savec); lieee1 = t1 == a && t2 > savec && lrnd; /* Now find the mantissa, t. It should be the integer part of log to the base beta of a, however it is safer to determine t by powering. So we find t as the smallest positive integer for which fl( beta**t + 1.0 ) = 1.0. */ lt = 0; a = 1.; c = 1.; /* + WHILE( C.EQ.ONE )LOOP */ L30: if (c == one) { ++lt; a *= lbeta; c = dlamc3_(&a, &one); d__1 = -a; c = dlamc3_(&c, &d__1); goto L30; } /* + END WHILE */ } *beta = lbeta; *t = lt; *rnd = lrnd; *ieee1 = lieee1; return 0; /* End of DLAMC1 */ } /* dlamc1_ */
/*< SUBROUTINE DLAMC4( EMIN, START, BASE ) >*/ /* Subroutine */ int dlamc4_(integer *emin, doublereal *start, integer *base) { /* System generated locals */ integer i__1; doublereal d__1; /* Local variables */ doublereal a; integer i__; doublereal b1, b2, c1, c2, d1, d2, one, zero, rbase; extern doublereal dlamc3_(doublereal *, doublereal *); /* -- LAPACK auxiliary routine (version 1.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ /* Courant Institute, Argonne National Lab, and Rice University */ /* October 31, 1992 */ /* .. Scalar Arguments .. */ /*< INTEGER BASE, EMIN >*/ /*< DOUBLE PRECISION START >*/ /* .. */ /* Purpose */ /* ======= */ /* DLAMC4 is a service routine for DLAMC2. */ /* Arguments */ /* ========= */ /* EMIN (output) EMIN */ /* The minimum exponent before (gradual) underflow, computed by */ /* setting A = START and dividing by BASE until the previous A */ /* can not be recovered. */ /* START (input) DOUBLE PRECISION */ /* The starting point for determining EMIN. */ /* BASE (input) INTEGER */ /* The base of the machine. */ /* ===================================================================== */ /* .. Local Scalars .. */ /*< INTEGER I >*/ /*< DOUBLE PRECISION A, B1, B2, C1, C2, D1, D2, ONE, RBASE, ZERO >*/ /* .. */ /* .. External Functions .. */ /*< DOUBLE PRECISION DLAMC3 >*/ /*< EXTERNAL DLAMC3 >*/ /* .. */ /* .. Executable Statements .. */ /*< A = START >*/ a = *start; /*< ONE = 1 >*/ one = 1.; /*< RBASE = ONE / BASE >*/ rbase = one / *base; /*< ZERO = 0 >*/ zero = 0.; /*< EMIN = 1 >*/ *emin = 1; /*< B1 = DLAMC3( A*RBASE, ZERO ) >*/ d__1 = a * rbase; b1 = dlamc3_(&d__1, &zero); /*< C1 = A >*/ c1 = a; /*< C2 = A >*/ c2 = a; /*< D1 = A >*/ d1 = a; /*< D2 = A >*/ d2 = a; /* + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. */ /* $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP */ /*< 10 CONTINUE >*/ L10: /*< >*/ if (c1 == a && c2 == a && d1 == a && d2 == a) { /*< EMIN = EMIN - 1 >*/ --(*emin); /*< A = B1 >*/ a = b1; /*< B1 = DLAMC3( A / BASE, ZERO ) >*/ d__1 = a / *base; b1 = dlamc3_(&d__1, &zero); /*< C1 = DLAMC3( B1*BASE, ZERO ) >*/ d__1 = b1 * *base; c1 = dlamc3_(&d__1, &zero); /*< D1 = ZERO >*/ d1 = zero; /*< DO 20 I = 1, BASE >*/ i__1 = *base; for (i__ = 1; i__ <= i__1; ++i__) { /*< D1 = D1 + B1 >*/ d1 += b1; /*< 20 CONTINUE >*/ /* L20: */ } /*< B2 = DLAMC3( A*RBASE, ZERO ) >*/ d__1 = a * rbase; b2 = dlamc3_(&d__1, &zero); /*< C2 = DLAMC3( B2 / RBASE, ZERO ) >*/ d__1 = b2 / rbase; c2 = dlamc3_(&d__1, &zero); /*< D2 = ZERO >*/ d2 = zero; /*< DO 30 I = 1, BASE >*/ i__1 = *base; for (i__ = 1; i__ <= i__1; ++i__) { /*< D2 = D2 + B2 >*/ d2 += b2; /*< 30 CONTINUE >*/ /* L30: */ } /*< GO TO 10 >*/ goto L10; /*< END IF >*/ } /* + END WHILE */ /*< RETURN >*/ return 0; /* End of DLAMC4 */ /*< END >*/ } /* dlamc4_ */
/* Subroutine */ int dlamc2_(int *beta, int *t, int *rnd, double *eps, int *emin, double *rmin, int *emax, double *rmax) { /* -- LAPACK auxiliary routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 Purpose ======= DLAMC2 determines the machine parameters specified in its argument list. Arguments ========= BETA (output) INT The base of the machine. T (output) INT The number of ( BETA ) digits in the mantissa. RND (output) INT Specifies whether proper rounding ( RND = .TRUE. ) or chopping ( RND = .FALSE. ) occurs in addition. This may not be a reliable guide to the way in which the machine performs its arithmetic. EPS (output) DOUBLE PRECISION The smallest positive number such that fl( 1.0 - EPS ) .LT. 1.0, where fl denotes the computed value. EMIN (output) INT The minimum exponent before (gradual) underflow occurs. RMIN (output) DOUBLE PRECISION The smallest normalized number for the machine, given by BASE**( EMIN - 1 ), where BASE is the floating point value of BETA. EMAX (output) INT The maximum exponent before overflow occurs. RMAX (output) DOUBLE PRECISION The largest positive number for the machine, given by BASE**EMAX * ( 1 - EPS ), where BASE is the floating point value of BETA. Further Details =============== The computation of EPS is based on a routine PARANOIA by W. Kahan of the University of California at Berkeley. ===================================================================== */ /* Table of constant values */ static int c__1 = 1; /* Initialized data */ static int first = TRUE_; static int iwarn = FALSE_; /* System generated locals */ int i__1; double d__1, d__2, d__3, d__4, d__5; /* Builtin functions */ double pow_di(double *, int *); /* Local variables */ static int ieee; static double half; static int lrnd; static double leps, zero, a, b, c; static int i, lbeta; static double rbase; static int lemin, lemax, gnmin; static double small; static int gpmin; static double third, lrmin, lrmax, sixth; extern /* Subroutine */ int dlamc1_(int *, int *, int *, int *); extern double dlamc3_(double *, double *); static int lieee1; extern /* Subroutine */ int dlamc4_(int *, double *, int *), dlamc5_(int *, int *, int *, int *, int *, double *); static int lt, ngnmin, ngpmin; static double one, two; if (first) { first = FALSE_; zero = 0.; one = 1.; two = 2.; /* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of BETA, T, RND, EPS, EMIN and RMIN. Throughout this routine we use the function DLAMC3 to ens ure that relevant values are stored and not held in registers, or are not affected by optimizers. DLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. */ dlamc1_(&lbeta, <, &lrnd, &lieee1); /* Start to find EPS. */ b = (double) lbeta; i__1 = -lt; a = pow_di(&b, &i__1); leps = a; /* Try some tricks to see whether or not this is the correct E PS. */ b = two / 3; half = one / 2; d__1 = -half; sixth = dlamc3_(&b, &d__1); third = dlamc3_(&sixth, &sixth); d__1 = -half; b = dlamc3_(&third, &d__1); b = dlamc3_(&b, &sixth); b = abs(b); if (b < leps) { b = leps; } leps = 1.; /* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */ L10: if (leps > b && b > zero) { leps = b; d__1 = half * leps; /* Computing 5th power */ d__3 = two, d__4 = d__3, d__3 *= d__3; /* Computing 2nd power */ d__5 = leps; d__2 = d__4 * (d__3 * d__3) * (d__5 * d__5); c = dlamc3_(&d__1, &d__2); d__1 = -c; c = dlamc3_(&half, &d__1); b = dlamc3_(&half, &c); d__1 = -b; c = dlamc3_(&half, &d__1); b = dlamc3_(&half, &c); goto L10; } /* + END WHILE */ if (a < leps) { leps = a; } /* Computation of EPS complete. Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3 )). Keep dividing A by BETA until (gradual) underflow occurs. T his is detected when we cannot recover the previous A. */ rbase = one / lbeta; small = one; for (i = 1; i <= 3; ++i) { d__1 = small * rbase; small = dlamc3_(&d__1, &zero); /* L20: */ } a = dlamc3_(&one, &small); dlamc4_(&ngpmin, &one, &lbeta); d__1 = -one; dlamc4_(&ngnmin, &d__1, &lbeta); dlamc4_(&gpmin, &a, &lbeta); d__1 = -a; dlamc4_(&gnmin, &d__1, &lbeta); ieee = FALSE_; if (ngpmin == ngnmin && gpmin == gnmin) { if (ngpmin == gpmin) { lemin = ngpmin; /* ( Non twos-complement machines, no gradual under flow; e.g., VAX ) */ } else if (gpmin - ngpmin == 3) { lemin = ngpmin - 1 + lt; ieee = TRUE_; /* ( Non twos-complement machines, with gradual und erflow; e.g., IEEE standard followers ) */ } else { lemin = min(ngpmin,gpmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else if (ngpmin == gpmin && ngnmin == gnmin) { if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) { lemin = max(ngpmin,ngnmin); /* ( Twos-complement machines, no gradual underflow ; e.g., CYBER 205 ) */ } else { lemin = min(ngpmin,ngnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin) { if (gpmin - min(ngpmin,ngnmin) == 3) { lemin = max(ngpmin,ngnmin) - 1 + lt; /* ( Twos-complement machines with gradual underflo w; no known machine ) */ } else { lemin = min(ngpmin,ngnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else { /* Computing MIN */ i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin); lemin = min(i__1,gnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } /* ** Comment out this if block if EMIN is ok */ if (iwarn) { first = TRUE_; printf("\n\n WARNING. The value EMIN may be incorrect:- "); printf("EMIN = %8i\n",lemin); printf("If, after inspection, the value EMIN looks acceptable"); printf("please comment out \n the IF block as marked within the"); printf("code of routine DLAMC2, \n otherwise supply EMIN"); printf("explicitly.\n"); } /* ** Assume IEEE arithmetic if we found denormalised numbers abo ve, or if arithmetic seems to round in the IEEE style, determi ned in routine DLAMC1. A true IEEE machine should have both thi ngs true; however, faulty machines may have one or the other. */ ieee = ieee || lieee1; /* Compute RMIN by successive division by BETA. We could comp ute RMIN as BASE**( EMIN - 1 ), but some machines underflow dur ing this computation. */ lrmin = 1.; i__1 = 1 - lemin; for (i = 1; i <= 1-lemin; ++i) { d__1 = lrmin * rbase; lrmin = dlamc3_(&d__1, &zero); /* L30: */ } /* Finally, call DLAMC5 to compute EMAX and RMAX. */ dlamc5_(&lbeta, <, &lemin, &ieee, &lemax, &lrmax); } *beta = lbeta; *t = lt; *rnd = lrnd; *eps = leps; *emin = lemin; *rmin = lrmin; *emax = lemax; *rmax = lrmax; return 0; /* End of DLAMC2 */ } /* dlamc2_ */
/* Subroutine */ int dlamc2_(integer *beta, integer *t, logical *rnd, doublereal *eps, integer *emin, doublereal *rmin, integer *emax, doublereal *rmax) { /* Initialized data */ static logical first = TRUE_; static logical iwarn = FALSE_; /* Format strings */ static char fmt_9999[] = "(//\002 WARNING. The value EMIN may be incorre" "ct:-\002,\002 EMIN = \002,i8,/\002 If, after inspection, the va" "lue EMIN looks\002,\002 acceptable please comment out \002,/\002" " the IF block as marked within the code of routine\002,\002 DLAM" "C2,\002,/\002 otherwise supply EMIN explicitly.\002,/)"; /* System generated locals */ integer i__1; doublereal d__1, d__2, d__3, d__4, d__5; /* Builtin functions */ double pow_di(doublereal *, integer *); //integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ doublereal a, b, c__; integer i__; static integer lt; doublereal one, two; logical ieee; doublereal half; logical lrnd; static doublereal leps; doublereal zero; static integer lbeta; doublereal rbase; static integer lemin, lemax; integer gnmin; doublereal small; integer gpmin; doublereal third; static doublereal lrmin, lrmax; doublereal sixth; extern /* Subroutine */ int dlamc1_(integer *, integer *, logical *, logical *); extern doublereal dlamc3_(doublereal *, doublereal *); logical lieee1; extern /* Subroutine */ int dlamc4_(integer *, doublereal *, integer *), dlamc5_(integer *, integer *, integer *, logical *, integer *, doublereal *); integer ngnmin, ngpmin; /* Fortran I/O blocks */ static cilist io___58 = { 0, 6, 0, fmt_9999, 0 }; /* -- LAPACK auxiliary routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAMC2 determines the machine parameters specified in its argument */ /* list. */ /* Arguments */ /* ========= */ /* BETA (output) INTEGER */ /* The base of the machine. */ /* T (output) INTEGER */ /* The number of ( BETA ) digits in the mantissa. */ /* RND (output) LOGICAL */ /* Specifies whether proper rounding ( RND = .TRUE. ) or */ /* chopping ( RND = .FALSE. ) occurs in addition. This may not */ /* be a reliable guide to the way in which the machine performs */ /* its arithmetic. */ /* EPS (output) DOUBLE PRECISION */ /* The smallest positive number such that */ /* fl( 1.0 - EPS ) .LT. 1.0, */ /* where fl denotes the computed value. */ /* EMIN (output) INTEGER */ /* The minimum exponent before (gradual) underflow occurs. */ /* RMIN (output) DOUBLE PRECISION */ /* The smallest normalized number for the machine, given by */ /* BASE**( EMIN - 1 ), where BASE is the floating point value */ /* of BETA. */ /* EMAX (output) INTEGER */ /* The maximum exponent before overflow occurs. */ /* RMAX (output) DOUBLE PRECISION */ /* The largest positive number for the machine, given by */ /* BASE**EMAX * ( 1 - EPS ), where BASE is the floating point */ /* value of BETA. */ /* Further Details */ /* =============== */ /* The computation of EPS is based on a routine PARANOIA by */ /* W. Kahan of the University of California at Berkeley. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Save statement .. */ /* .. */ /* .. Data statements .. */ /* .. */ /* .. Executable Statements .. */ if (first) { zero = 0.; one = 1.; two = 2.; /* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of */ /* BETA, T, RND, EPS, EMIN and RMIN. */ /* Throughout this routine we use the function DLAMC3 to ensure */ /* that relevant values are stored and not held in registers, or */ /* are not affected by optimizers. */ /* DLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. */ dlamc1_(&lbeta, <, &lrnd, &lieee1); /* Start to find EPS. */ b = (doublereal) lbeta; i__1 = -lt; a = pow_di(&b, &i__1); leps = a; /* Try some tricks to see whether or not this is the correct EPS. */ b = two / 3; half = one / 2; d__1 = -half; sixth = dlamc3_(&b, &d__1); third = dlamc3_(&sixth, &sixth); d__1 = -half; b = dlamc3_(&third, &d__1); b = dlamc3_(&b, &sixth); b = abs(b); if (b < leps) { b = leps; } leps = 1.; /* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */ L10: if (leps > b && b > zero) { leps = b; d__1 = half * leps; /* Computing 5th power */ d__3 = two, d__4 = d__3, d__3 *= d__3; /* Computing 2nd power */ d__5 = leps; d__2 = d__4 * (d__3 * d__3) * (d__5 * d__5); c__ = dlamc3_(&d__1, &d__2); d__1 = -c__; c__ = dlamc3_(&half, &d__1); b = dlamc3_(&half, &c__); d__1 = -b; c__ = dlamc3_(&half, &d__1); b = dlamc3_(&half, &c__); goto L10; } /* + END WHILE */ if (a < leps) { leps = a; } /* Computation of EPS complete. */ /* Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). */ /* Keep dividing A by BETA until (gradual) underflow occurs. This */ /* is detected when we cannot recover the previous A. */ rbase = one / lbeta; small = one; for (i__ = 1; i__ <= 3; ++i__) { d__1 = small * rbase; small = dlamc3_(&d__1, &zero); /* L20: */ } a = dlamc3_(&one, &small); dlamc4_(&ngpmin, &one, &lbeta); d__1 = -one; dlamc4_(&ngnmin, &d__1, &lbeta); dlamc4_(&gpmin, &a, &lbeta); d__1 = -a; dlamc4_(&gnmin, &d__1, &lbeta); ieee = FALSE_; if (ngpmin == ngnmin && gpmin == gnmin) { if (ngpmin == gpmin) { lemin = ngpmin; /* ( Non twos-complement machines, no gradual underflow; */ /* e.g., VAX ) */ } else if (gpmin - ngpmin == 3) { lemin = ngpmin - 1 + lt; ieee = TRUE_; /* ( Non twos-complement machines, with gradual underflow; */ /* e.g., IEEE standard followers ) */ } else { lemin = min(ngpmin,gpmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else if (ngpmin == gpmin && ngnmin == gnmin) { if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) { lemin = max(ngpmin,ngnmin); /* ( Twos-complement machines, no gradual underflow; */ /* e.g., CYBER 205 ) */ } else { lemin = min(ngpmin,ngnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin) { if (gpmin - min(ngpmin,ngnmin) == 3) { lemin = max(ngpmin,ngnmin) - 1 + lt; /* ( Twos-complement machines with gradual underflow; */ /* no known machine ) */ } else { lemin = min(ngpmin,ngnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else { /* Computing MIN */ i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin); lemin = min(i__1,gnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } first = FALSE_; /* ** */ /* Comment out this if block if EMIN is ok */ if (iwarn) { first = TRUE_; printf("\n\n WARNING. The value EMIN may be incorrect:- "); printf("EMIN = %8i\n",lemin); printf("If, after inspection, the value EMIN looks acceptable"); printf("please comment out \n the IF block as marked within the"); printf("code of routine DLAMC2, \n otherwise supply EMIN"); printf("explicitly.\n"); /* s_wsfe(&io___58); do_fio(&c__1, (char *)&lemin, (ftnlen)sizeof(integer)); e_wsfe(); */ } /* ** */ /* Assume IEEE arithmetic if we found denormalised numbers above, */ /* or if arithmetic seems to round in the IEEE style, determined */ /* in routine DLAMC1. A true IEEE machine should have both things */ /* true; however, faulty machines may have one or the other. */ ieee = ieee || lieee1; /* Compute RMIN by successive division by BETA. We could compute */ /* RMIN as BASE**( EMIN - 1 ), but some machines underflow during */ /* this computation. */ lrmin = 1.; i__1 = 1 - lemin; for (i__ = 1; i__ <= i__1; ++i__) { d__1 = lrmin * rbase; lrmin = dlamc3_(&d__1, &zero); /* L30: */ } /* Finally, call DLAMC5 to compute EMAX and RMAX. */ dlamc5_(&lbeta, <, &lemin, &ieee, &lemax, &lrmax); } *beta = lbeta; *t = lt; *rnd = lrnd; *eps = leps; *emin = lemin; *rmin = lrmin; *emax = lemax; *rmax = lrmax; return 0; /* End of DLAMC2 */ } /* dlamc2_ */
/* Subroutine */ int dlamc4_(int *emin, double *start, int *base) { /* -- LAPACK auxiliary routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 Purpose ======= DLAMC4 is a service routine for DLAMC2. Arguments ========= EMIN (output) EMIN The minimum exponent before (gradual) underflow, computed by setting A = START and dividing by BASE until the previous A can not be recovered. START (input) DOUBLE PRECISION The starting point for determining EMIN. BASE (input) INT The base of the machine. ===================================================================== */ /* System generated locals */ int i__1; double d__1; /* Local variables */ static double zero, a; static int i; static double rbase, b1, b2, c1, c2, d1, d2; extern double dlamc3_(double *, double *); static double one; a = *start; one = 1.; rbase = one / *base; zero = 0.; *emin = 1; d__1 = a * rbase; b1 = dlamc3_(&d__1, &zero); c1 = a; c2 = a; d1 = a; d2 = a; /* + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP */ L10: if (c1 == a && c2 == a && d1 == a && d2 == a) { --(*emin); a = b1; d__1 = a / *base; b1 = dlamc3_(&d__1, &zero); d__1 = b1 * *base; c1 = dlamc3_(&d__1, &zero); d1 = zero; i__1 = *base; for (i = 1; i <= *base; ++i) { d1 += b1; /* L20: */ } d__1 = a * rbase; b2 = dlamc3_(&d__1, &zero); d__1 = b2 / rbase; c2 = dlamc3_(&d__1, &zero); d2 = zero; i__1 = *base; for (i = 1; i <= *base; ++i) { d2 += b2; /* L30: */ } goto L10; } /* + END WHILE */ return 0; /* End of DLAMC4 */ } /* dlamc4_ */
/* Subroutine */ int dlamc5_(integer *beta, integer *p, integer *emin, logical *ieee, integer *emax, doublereal *rmax) { /* System generated locals */ integer i__1; doublereal d__1; /* Local variables */ integer i__; doublereal y, z__; integer try__, lexp; doublereal oldy; integer uexp, nbits; extern doublereal dlamc3_(doublereal *, doublereal *); doublereal recbas; integer exbits, expsum; /* -- LAPACK auxiliary routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* DLAMC5 attempts to compute RMAX, the largest machine floating-point */ /* number, without overflow. It assumes that EMAX + abs(EMIN) sum */ /* approximately to a power of 2. It will fail on machines where this */ /* assumption does not hold, for example, the Cyber 205 (EMIN = -28625, */ /* EMAX = 28718). It will also fail if the value supplied for EMIN is */ /* too large (i.e. too close to zero), probably with overflow. */ /* Arguments */ /* ========= */ /* BETA (input) INTEGER */ /* The base of floating-point arithmetic. */ /* P (input) INTEGER */ /* The number of base BETA digits in the mantissa of a */ /* floating-point value. */ /* EMIN (input) INTEGER */ /* The minimum exponent before (gradual) underflow. */ /* IEEE (input) LOGICAL */ /* A logical flag specifying whether or not the arithmetic */ /* system is thought to comply with the IEEE standard. */ /* EMAX (output) INTEGER */ /* The largest exponent before overflow */ /* RMAX (output) DOUBLE PRECISION */ /* The largest machine floating-point number. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* First compute LEXP and UEXP, two powers of 2 that bound */ /* abs(EMIN). We then assume that EMAX + abs(EMIN) will sum */ /* approximately to the bound that is closest to abs(EMIN). */ /* (EMAX is the exponent of the required number RMAX). */ lexp = 1; exbits = 1; L10: try__ = lexp << 1; if (try__ <= -(*emin)) { lexp = try__; ++exbits; goto L10; } if (lexp == -(*emin)) { uexp = lexp; } else { uexp = try__; ++exbits; } /* Now -LEXP is less than or equal to EMIN, and -UEXP is greater */ /* than or equal to EMIN. EXBITS is the number of bits needed to */ /* store the exponent. */ if (uexp + *emin > -lexp - *emin) { expsum = lexp << 1; } else { expsum = uexp << 1; } /* EXPSUM is the exponent range, approximately equal to */ /* EMAX - EMIN + 1 . */ *emax = expsum + *emin - 1; nbits = exbits + 1 + *p; /* NBITS is the total number of bits needed to store a */ /* floating-point number. */ if (nbits % 2 == 1 && *beta == 2) { /* Either there are an odd number of bits used to store a */ /* floating-point number, which is unlikely, or some bits are */ /* not used in the representation of numbers, which is possible, */ /* (e.g. Cray machines) or the mantissa has an implicit bit, */ /* (e.g. IEEE machines, Dec Vax machines), which is perhaps the */ /* most likely. We have to assume the last alternative. */ /* If this is true, then we need to reduce EMAX by one because */ /* there must be some way of representing zero in an implicit-bit */ /* system. On machines like Cray, we are reducing EMAX by one */ /* unnecessarily. */ --(*emax); } if (*ieee) { /* Assume we are on an IEEE machine which reserves one exponent */ /* for infinity and NaN. */ --(*emax); } /* Now create RMAX, the largest machine number, which should */ /* be equal to (1.0 - BETA**(-P)) * BETA**EMAX . */ /* First compute 1.0 - BETA**(-P), being careful that the */ /* result is less than 1.0 . */ recbas = 1. / *beta; z__ = *beta - 1.; y = 0.; i__1 = *p; for (i__ = 1; i__ <= i__1; ++i__) { z__ *= recbas; if (y < 1.) { oldy = y; } y = dlamc3_(&y, &z__); /* L20: */ } if (y >= 1.) { y = oldy; } /* Now multiply by BETA**EMAX to get RMAX. */ i__1 = *emax; for (i__ = 1; i__ <= i__1; ++i__) { d__1 = y * *beta; y = dlamc3_(&d__1, &c_b32); /* L30: */ } *rmax = y; return 0; /* End of DLAMC5 */ } /* dlamc5_ */
/* Subroutine */ int dlamc5_(int *beta, int *p, int *emin, int *ieee, int *emax, double *rmax) { /* -- LAPACK auxiliary routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 Purpose ======= DLAMC5 attempts to compute RMAX, the largest machine floating-point number, without overflow. It assumes that EMAX + abs(EMIN) sum approximately to a power of 2. It will fail on machines where this assumption does not hold, for example, the Cyber 205 (EMIN = -28625, EMAX = 28718). It will also fail if the value supplied for EMIN is too large (i.e. too close to zero), probably with overflow. Arguments ========= BETA (input) INT The base of floating-point arithmetic. P (input) INT The number of base BETA digits in the mantissa of a floating-point value. EMIN (input) INT The minimum exponent before (gradual) underflow. IEEE (input) INT A int flag specifying whether or not the arithmetic system is thought to comply with the IEEE standard. EMAX (output) INT The largest exponent before overflow RMAX (output) DOUBLE PRECISION The largest machine floating-point number. ===================================================================== First compute LEXP and UEXP, two powers of 2 that bound abs(EMIN). We then assume that EMAX + abs(EMIN) will sum approximately to the bound that is closest to abs(EMIN). (EMAX is the exponent of the required number RMAX). */ /* Table of constant values */ static double c_b5 = 0.; /* System generated locals */ int i__1; double d__1; /* Local variables */ static int lexp; static double oldy; static int uexp, i; static double y, z; static int nbits; extern double dlamc3_(double *, double *); static double recbas; static int exbits, expsum, try__; lexp = 1; exbits = 1; L10: try__ = lexp << 1; if (try__ <= -(*emin)) { lexp = try__; ++exbits; goto L10; } if (lexp == -(*emin)) { uexp = lexp; } else { uexp = try__; ++exbits; } /* Now -LEXP is less than or equal to EMIN, and -UEXP is greater than or equal to EMIN. EXBITS is the number of bits needed to store the exponent. */ if (uexp + *emin > -lexp - *emin) { expsum = lexp << 1; } else { expsum = uexp << 1; } /* EXPSUM is the exponent range, approximately equal to EMAX - EMIN + 1 . */ *emax = expsum + *emin - 1; nbits = exbits + 1 + *p; /* NBITS is the total number of bits needed to store a floating-point number. */ if (nbits % 2 == 1 && *beta == 2) { /* Either there are an odd number of bits used to store a floating-point number, which is unlikely, or some bits are not used in the representation of numbers, which is possible , (e.g. Cray machines) or the mantissa has an implicit bit, (e.g. IEEE machines, Dec Vax machines), which is perhaps the most likely. We have to assume the last alternative. If this is true, then we need to reduce EMAX by one because there must be some way of representing zero in an implicit-b it system. On machines like Cray, we are reducing EMAX by one unnecessarily. */ --(*emax); } if (*ieee) { /* Assume we are on an IEEE machine which reserves one exponent for infinity and NaN. */ --(*emax); } /* Now create RMAX, the largest machine number, which should be equal to (1.0 - BETA**(-P)) * BETA**EMAX . First compute 1.0 - BETA**(-P), being careful that the result is less than 1.0 . */ recbas = 1. / *beta; z = *beta - 1.; y = 0.; i__1 = *p; for (i = 1; i <= *p; ++i) { z *= recbas; if (y < 1.) { oldy = y; } y = dlamc3_(&y, &z); /* L20: */ } if (y >= 1.) { y = oldy; } /* Now multiply by BETA**EMAX to get RMAX. */ i__1 = *emax; for (i = 1; i <= *emax; ++i) { d__1 = y * *beta; y = dlamc3_(&d__1, &c_b5); /* L30: */ } *rmax = y; return 0; /* End of DLAMC5 */ } /* dlamc5_ */
/* Subroutine */ int dlaed3_(integer *k, integer *n, integer *n1, doublereal * d__, doublereal *q, integer *ldq, doublereal *rho, doublereal *dlamda, doublereal *q2, integer *indx, integer *ctot, doublereal *w, doublereal *s, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, Courant Institute, NAG Ltd., and Rice University June 30, 1999 Purpose ======= DLAED3 finds the roots of the secular equation, as defined by the values in D, W, and RHO, between 1 and K. It makes the appropriate calls to DLAED4 and then updates the eigenvectors by multiplying the matrix of eigenvectors of the pair of eigensystems being combined by the matrix of eigenvectors of the K-by-K system which is solved here. This code makes very mild assumptions about floating point arithmetic. It will work on machines with a guard digit in add/subtract, or on those binary machines without guard digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. It could conceivably fail on hexadecimal or decimal machines without guard digits, but we know of none. Arguments ========= K (input) INTEGER The number of terms in the rational function to be solved by DLAED4. K >= 0. N (input) INTEGER The number of rows and columns in the Q matrix. N >= K (deflation may result in N>K). N1 (input) INTEGER The location of the last eigenvalue in the leading submatrix. min(1,N) <= N1 <= N/2. D (output) DOUBLE PRECISION array, dimension (N) D(I) contains the updated eigenvalues for 1 <= I <= K. Q (output) DOUBLE PRECISION array, dimension (LDQ,N) Initially the first K columns are used as workspace. On output the columns 1 to K contain the updated eigenvectors. LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= max(1,N). RHO (input) DOUBLE PRECISION The value of the parameter in the rank one update equation. RHO >= 0 required. DLAMDA (input/output) DOUBLE PRECISION array, dimension (K) The first K elements of this array contain the old roots of the deflated updating problem. These are the poles of the secular equation. May be changed on output by having lowest order bit set to zero on Cray X-MP, Cray Y-MP, Cray-2, or Cray C-90, as described above. Q2 (input) DOUBLE PRECISION array, dimension (LDQ2, N) The first K columns of this matrix contain the non-deflated eigenvectors for the split problem. INDX (input) INTEGER array, dimension (N) The permutation used to arrange the columns of the deflated Q matrix into three groups (see DLAED2). The rows of the eigenvectors found by DLAED4 must be likewise permuted before the matrix multiply can take place. CTOT (input) INTEGER array, dimension (4) A count of the total number of the various types of columns in Q, as described in INDX. The fourth column type is any column which has been deflated. W (input/output) DOUBLE PRECISION array, dimension (K) The first K elements of this array contain the components of the deflation-adjusted updating vector. Destroyed on output. S (workspace) DOUBLE PRECISION array, dimension (N1 + 1)*K Will contain the eigenvectors of the repaired matrix which will be multiplied by the previously accumulated eigenvectors to update the system. LDS (input) INTEGER The leading dimension of S. LDS >= max(1,K). INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if INFO = 1, an eigenvalue did not converge Further Details =============== Based on contributions by Jeff Rutter, Computer Science Division, University of California at Berkeley, USA Modified by Francoise Tisseur, University of Tennessee. ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static doublereal c_b22 = 1.; static doublereal c_b23 = 0.; /* System generated locals */ integer q_dim1, q_offset, i__1, i__2; doublereal d__1; /* Builtin functions */ double sqrt(doublereal), d_sign(doublereal *, doublereal *); /* Local variables */ static doublereal temp; extern doublereal dnrm2_(integer *, doublereal *, integer *); static integer i__, j; extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *), dcopy_(integer *, doublereal *, integer *, doublereal *, integer *), dlaed4_(integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *); static integer n2; extern doublereal dlamc3_(doublereal *, doublereal *); static integer n12, ii, n23; extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *), dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); static integer iq2; #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] --d__; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; --dlamda; --q2; --indx; --ctot; --w; --s; /* Function Body */ *info = 0; if (*k < 0) { *info = -1; } else if (*n < *k) { *info = -2; } else if (*ldq < max(1,*n)) { *info = -6; } if (*info != 0) { i__1 = -(*info); xerbla_("DLAED3", &i__1); return 0; } /* Quick return if possible */ if (*k == 0) { return 0; } /* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can be computed with high relative accuracy (barring over/underflow). This is a problem on machines without a guard digit in add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), which on any of these machines zeros out the bottommost bit of DLAMDA(I) if it is 1; this makes the subsequent subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation occurs. On binary machines with a guard digit (almost all machines) it does not change DLAMDA(I) at all. On hexadecimal and decimal machines with a guard digit, it slightly changes the bottommost bits of DLAMDA(I). It does not account for hexadecimal or decimal machines without guard digits (we know of none). We use a subroutine call to compute 2*DLAMBDA(I) to prevent optimizing compilers from eliminating this code. */ i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { dlamda[i__] = dlamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; /* L10: */ } i__1 = *k; for (j = 1; j <= i__1; ++j) { dlaed4_(k, &j, &dlamda[1], &w[1], &q_ref(1, j), rho, &d__[j], info); /* If the zero finder fails, the computation is terminated. */ if (*info != 0) { goto L120; } /* L20: */ } if (*k == 1) { goto L110; } if (*k == 2) { i__1 = *k; for (j = 1; j <= i__1; ++j) { w[1] = q_ref(1, j); w[2] = q_ref(2, j); ii = indx[1]; q_ref(1, j) = w[ii]; ii = indx[2]; q_ref(2, j) = w[ii]; /* L30: */ } goto L110; } /* Compute updated W. */ dcopy_(k, &w[1], &c__1, &s[1], &c__1); /* Initialize W(I) = Q(I,I) */ i__1 = *ldq + 1; dcopy_(k, &q[q_offset], &i__1, &w[1], &c__1); i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { w[i__] *= q_ref(i__, j) / (dlamda[i__] - dlamda[j]); /* L40: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { w[i__] *= q_ref(i__, j) / (dlamda[i__] - dlamda[j]); /* L50: */ } /* L60: */ } i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { d__1 = sqrt(-w[i__]); w[i__] = d_sign(&d__1, &s[i__]); /* L70: */ } /* Compute eigenvectors of the modified rank-1 modification. */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = *k; for (i__ = 1; i__ <= i__2; ++i__) { s[i__] = w[i__] / q_ref(i__, j); /* L80: */ } temp = dnrm2_(k, &s[1], &c__1); i__2 = *k; for (i__ = 1; i__ <= i__2; ++i__) { ii = indx[i__]; q_ref(i__, j) = s[ii] / temp; /* L90: */ } /* L100: */ } /* Compute the updated eigenvectors. */ L110: n2 = *n - *n1; n12 = ctot[1] + ctot[2]; n23 = ctot[2] + ctot[3]; dlacpy_("A", &n23, k, &q_ref(ctot[1] + 1, 1), ldq, &s[1], &n23) ; iq2 = *n1 * n12 + 1; if (n23 != 0) { dgemm_("N", "N", &n2, k, &n23, &c_b22, &q2[iq2], &n2, &s[1], &n23, & c_b23, &q_ref(*n1 + 1, 1), ldq); } else { dlaset_("A", &n2, k, &c_b23, &c_b23, &q_ref(*n1 + 1, 1), ldq); } dlacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12); if (n12 != 0) { dgemm_("N", "N", n1, k, &n12, &c_b22, &q2[1], n1, &s[1], &n12, &c_b23, &q[q_offset], ldq); } else { dlaset_("A", n1, k, &c_b23, &c_b23, &q_ref(1, 1), ldq); } L120: return 0; /* End of DLAED3 */ } /* dlaed3_ */
/* Subroutine */ int dlasd8_(integer *icompq, integer *k, doublereal *d__, doublereal *z__, doublereal *vf, doublereal *vl, doublereal *difl, doublereal *difr, integer *lddifr, doublereal *dsigma, doublereal * work, integer *info) { /* System generated locals */ integer difr_dim1, difr_offset, i__1, i__2; doublereal d__1, d__2; /* Builtin functions */ double sqrt(doublereal), d_sign(doublereal *, doublereal *); /* Local variables */ extern doublereal ddot_(integer *, doublereal *, integer *, doublereal *, integer *); static doublereal temp; extern doublereal dnrm2_(integer *, doublereal *, integer *); static integer iwk2i, iwk3i, i__, j; static doublereal diflj, difrj, dsigj; extern /* Subroutine */ int dcopy_(integer *, doublereal *, integer *, doublereal *, integer *); extern doublereal dlamc3_(doublereal *, doublereal *); extern /* Subroutine */ int dlasd4_(integer *, integer *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, doublereal *, integer *); static doublereal dj; extern /* Subroutine */ int dlascl_(char *, integer *, integer *, doublereal *, doublereal *, integer *, integer *, doublereal *, integer *, integer *), dlaset_(char *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *), xerbla_(char *, integer *); static doublereal dsigjp, rho; static integer iwk1, iwk2, iwk3; #define difr_ref(a_1,a_2) difr[(a_2)*difr_dim1 + a_1] /* -- LAPACK auxiliary routine (instrumented to count ops, version 3.0) -- Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, Courant Institute, NAG Ltd., and Rice University June 30, 1999 Purpose ======= DLASD8 finds the square roots of the roots of the secular equation, as defined by the values in DSIGMA and Z. It makes the appropriate calls to DLASD4, and stores, for each element in D, the distance to its two nearest poles (elements in DSIGMA). It also updates the arrays VF and VL, the first and last components of all the right singular vectors of the original bidiagonal matrix. DLASD8 is called from DLASD6. Arguments ========= ICOMPQ (input) INTEGER Specifies whether singular vectors are to be computed in factored form in the calling routine: = 0: Compute singular values only. = 1: Compute singular vectors in factored form as well. K (input) INTEGER The number of terms in the rational function to be solved by DLASD4. K >= 1. D (output) DOUBLE PRECISION array, dimension ( K ) On output, D contains the updated singular values. Z (input) DOUBLE PRECISION array, dimension ( K ) The first K elements of this array contain the components of the deflation-adjusted updating row vector. VF (input/output) DOUBLE PRECISION array, dimension ( K ) On entry, VF contains information passed through DBEDE8. On exit, VF contains the first K components of the first components of all right singular vectors of the bidiagonal matrix. VL (input/output) DOUBLE PRECISION array, dimension ( K ) On entry, VL contains information passed through DBEDE8. On exit, VL contains the first K components of the last components of all right singular vectors of the bidiagonal matrix. DIFL (output) DOUBLE PRECISION array, dimension ( K ) On exit, DIFL(I) = D(I) - DSIGMA(I). DIFR (output) DOUBLE PRECISION array, dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and dimension ( K ) if ICOMPQ = 0. On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not defined and will not be referenced. If ICOMPQ = 1, DIFR(1:K,2) is an array containing the normalizing factors for the right singular vector matrix. LDDIFR (input) INTEGER The leading dimension of DIFR, must be at least K. DSIGMA (input) DOUBLE PRECISION array, dimension ( K ) The first K elements of this array contain the old roots of the deflated updating problem. These are the poles of the secular equation. WORK (workspace) DOUBLE PRECISION array, dimension at least 3 * K INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if INFO = 1, an singular value did not converge Further Details =============== Based on contributions by Ming Gu and Huan Ren, Computer Science Division, University of California at Berkeley, USA ===================================================================== Test the input parameters. Parameter adjustments */ --d__; --z__; --vf; --vl; --difl; difr_dim1 = *lddifr; difr_offset = 1 + difr_dim1 * 1; difr -= difr_offset; --dsigma; --work; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*k < 1) { *info = -2; } else if (*lddifr < *k) { *info = -9; } if (*info != 0) { i__1 = -(*info); xerbla_("DLASD8", &i__1); return 0; } /* Quick return if possible */ if (*k == 1) { d__[1] = abs(z__[1]); difl[1] = d__[1]; if (*icompq == 1) { difl[2] = 1.; difr_ref(1, 2) = 1.; } return 0; } /* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can be computed with high relative accuracy (barring over/underflow). This is a problem on machines without a guard digit in add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), which on any of these machines zeros out the bottommost bit of DSIGMA(I) if it is 1; this makes the subsequent subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation occurs. On binary machines with a guard digit (almost all machines) it does not change DSIGMA(I) at all. On hexadecimal and decimal machines with a guard digit, it slightly changes the bottommost bits of DSIGMA(I). It does not account for hexadecimal or decimal machines without guard digits (we know of none). We use a subroutine call to compute 2*DLAMBDA(I) to prevent optimizing compilers from eliminating this code. */ latime_1.ops += (doublereal) (*k << 1); i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { dsigma[i__] = dlamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; /* L10: */ } /* Book keeping. */ iwk1 = 1; iwk2 = iwk1 + *k; iwk3 = iwk2 + *k; iwk2i = iwk2 - 1; iwk3i = iwk3 - 1; /* Normalize Z. */ latime_1.ops += (doublereal) (*k * 3 + 1); rho = dnrm2_(k, &z__[1], &c__1); dlascl_("G", &c__0, &c__0, &rho, &c_b8, k, &c__1, &z__[1], k, info); rho *= rho; /* Initialize WORK(IWK3). */ dlaset_("A", k, &c__1, &c_b8, &c_b8, &work[iwk3], k); /* Compute the updated singular values, the arrays DIFL, DIFR, and the updated Z. */ i__1 = *k; for (j = 1; j <= i__1; ++j) { dlasd4_(k, &j, &dsigma[1], &z__[1], &work[iwk1], &rho, &d__[j], &work[ iwk2], info); /* If the root finder fails, the computation is terminated. */ if (*info != 0) { return 0; } latime_1.ops += 2.; work[iwk3i + j] = work[iwk3i + j] * work[j] * work[iwk2i + j]; difl[j] = -work[j]; difr_ref(j, 1) = -work[j + 1]; latime_1.ops += (doublereal) ((j - 1) * 6); i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ j]); /* L20: */ } latime_1.ops += (doublereal) ((*k - j) * 6); i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ j]); /* L30: */ } /* L40: */ } /* Compute updated Z. */ latime_1.ops += (doublereal) (*k); i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { d__2 = sqrt((d__1 = work[iwk3i + i__], abs(d__1))); z__[i__] = d_sign(&d__2, &z__[i__]); /* L50: */ } /* Update VF and VL. */ i__1 = *k; for (j = 1; j <= i__1; ++j) { diflj = difl[j]; dj = d__[j]; dsigj = -dsigma[j]; if (j < *k) { difrj = -difr_ref(j, 1); dsigjp = -dsigma[j + 1]; } latime_1.ops += 3.; work[j] = -z__[j] / diflj / (dsigma[j] + dj); latime_1.ops += (doublereal) ((j - 1) * 5); i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigj) - diflj) / ( dsigma[i__] + dj); /* L60: */ } latime_1.ops += (doublereal) ((*k - j) * 5); i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { work[i__] = z__[i__] / (dlamc3_(&dsigma[i__], &dsigjp) + difrj) / (dsigma[i__] + dj); /* L70: */ } latime_1.ops += (doublereal) (*k * 6); temp = dnrm2_(k, &work[1], &c__1); work[iwk2i + j] = ddot_(k, &work[1], &c__1, &vf[1], &c__1) / temp; work[iwk3i + j] = ddot_(k, &work[1], &c__1, &vl[1], &c__1) / temp; if (*icompq == 1) { difr_ref(j, 2) = temp; } /* L80: */ } dcopy_(k, &work[iwk2], &c__1, &vf[1], &c__1); dcopy_(k, &work[iwk3], &c__1, &vl[1], &c__1); return 0; /* End of DLASD8 */ } /* dlasd8_ */