int slamc4_(int *emin, float *start, int *base) { /* System generated locals */ int i__1; float r__1; /* Local variables */ static float zero, a; static int i; static float rbase, b1, b2, c1, c2, d1, d2; extern double slamc3_(float *, float *); static float one; a = *start; one = 1.f; rbase = one / *base; zero = 0.f; *emin = 1; r__1 = a * rbase; b1 = slamc3_(&r__1, &zero); c1 = a; c2 = a; d1 = a; d2 = a; /* + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP */ L10: if (c1 == a && c2 == a && d1 == a && d2 == a) { --(*emin); a = b1; r__1 = a / *base; b1 = slamc3_(&r__1, &zero); r__1 = b1 * *base; c1 = slamc3_(&r__1, &zero); d1 = zero; i__1 = *base; for (i = 1; i <= *base; ++i) { d1 += b1; /* L20: */ } r__1 = a * rbase; b2 = slamc3_(&r__1, &zero); r__1 = b2 / rbase; c2 = slamc3_(&r__1, &zero); d2 = zero; i__1 = *base; for (i = 1; i <= *base; ++i) { d2 += b2; /* L30: */ } goto L10; } /* + END WHILE */ return 0; /* End of SLAMC4 */ } /* slamc4_ */
/* Subroutine */ int slamc4_(integer *emin, real *start, integer *base) { /* -- LAPACK auxiliary routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 Purpose ======= SLAMC4 is a service routine for SLAMC2. Arguments ========= EMIN (output) EMIN The minimum exponent before (gradual) underflow, computed by setting A = START and dividing by BASE until the previous A can not be recovered. START (input) REAL The starting point for determining EMIN. BASE (input) INTEGER The base of the machine. ===================================================================== */ /* System generated locals */ integer i__1; real r__1; /* Local variables */ static real zero, a; static integer i; static real rbase, b1, b2, c1, c2, d1, d2; extern doublereal slamc3_(real *, real *); static real one; a = *start; one = 1.f; rbase = one / *base; zero = 0.f; *emin = 1; r__1 = a * rbase; b1 = slamc3_(&r__1, &zero); c1 = a; c2 = a; d1 = a; d2 = a; /* + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP */ L10: if (c1 == a && c2 == a && d1 == a && d2 == a) { --(*emin); a = b1; r__1 = a / *base; b1 = slamc3_(&r__1, &zero); r__1 = b1 * *base; c1 = slamc3_(&r__1, &zero); d1 = zero; i__1 = *base; for (i = 1; i <= *base; ++i) { d1 += b1; /* L20: */ } r__1 = a * rbase; b2 = slamc3_(&r__1, &zero); r__1 = b2 / rbase; c2 = slamc3_(&r__1, &zero); d2 = zero; i__1 = *base; for (i = 1; i <= *base; ++i) { d2 += b2; /* L30: */ } goto L10; } /* + END WHILE */ return 0; /* End of SLAMC4 */ } /* slamc4_ */
/* Subroutine */ int slamc2_(integer *beta, integer *t, logical *rnd, real * eps, integer *emin, real *rmin, integer *emax, real *rmax) { /* -- LAPACK auxiliary routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 Purpose ======= SLAMC2 determines the machine parameters specified in its argument list. Arguments ========= BETA (output) INTEGER The base of the machine. T (output) INTEGER The number of ( BETA ) digits in the mantissa. RND (output) LOGICAL Specifies whether proper rounding ( RND = .TRUE. ) or chopping ( RND = .FALSE. ) occurs in addition. This may not be a reliable guide to the way in which the machine performs its arithmetic. EPS (output) REAL The smallest positive number such that fl( 1.0 - EPS ) .LT. 1.0, where fl denotes the computed value. EMIN (output) INTEGER The minimum exponent before (gradual) underflow occurs. RMIN (output) REAL The smallest normalized number for the machine, given by BASE**( EMIN - 1 ), where BASE is the floating point value of BETA. EMAX (output) INTEGER The maximum exponent before overflow occurs. RMAX (output) REAL The largest positive number for the machine, given by BASE**EMAX * ( 1 - EPS ), where BASE is the floating point value of BETA. Further Details =============== The computation of EPS is based on a routine PARANOIA by W. Kahan of the University of California at Berkeley. ===================================================================== */ /* Table of constant values */ static integer c__1 = 1; /* Initialized data */ static logical first = TRUE_; static logical iwarn = FALSE_; /* System generated locals */ integer i__1; real r__1, r__2, r__3, r__4, r__5; /* Builtin functions */ double pow_ri(real *, integer *); /* Local variables */ static logical ieee; static real half; static logical lrnd; static real leps, zero, a, b, c; static integer i, lbeta; static real rbase; static integer lemin, lemax, gnmin; static real small; static integer gpmin; static real third, lrmin, lrmax, sixth; static logical lieee1; extern /* Subroutine */ int slamc1_(integer *, integer *, logical *, logical *); extern doublereal slamc3_(real *, real *); extern /* Subroutine */ int slamc4_(integer *, real *, integer *), slamc5_(integer *, integer *, integer *, logical *, integer *, real *); static integer lt, ngnmin, ngpmin; static real one, two; if (first) { first = FALSE_; zero = 0.f; one = 1.f; two = 2.f; /* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of BETA, T, RND, EPS, EMIN and RMIN. Throughout this routine we use the function SLAMC3 to ens ure that relevant values are stored and not held in registers, or are not affected by optimizers. SLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. */ slamc1_(&lbeta, <, &lrnd, &lieee1); /* Start to find EPS. */ b = (real) lbeta; i__1 = -lt; a = pow_ri(&b, &i__1); leps = a; /* Try some tricks to see whether or not this is the correct E PS. */ b = two / 3; half = one / 2; r__1 = -(doublereal)half; sixth = slamc3_(&b, &r__1); third = slamc3_(&sixth, &sixth); r__1 = -(doublereal)half; b = slamc3_(&third, &r__1); b = slamc3_(&b, &sixth); b = dabs(b); if (b < leps) { b = leps; } leps = 1.f; /* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */ L10: if (leps > b && b > zero) { leps = b; r__1 = half * leps; /* Computing 5th power */ r__3 = two, r__4 = r__3, r__3 *= r__3; /* Computing 2nd power */ r__5 = leps; r__2 = r__4 * (r__3 * r__3) * (r__5 * r__5); c = slamc3_(&r__1, &r__2); r__1 = -(doublereal)c; c = slamc3_(&half, &r__1); b = slamc3_(&half, &c); r__1 = -(doublereal)b; c = slamc3_(&half, &r__1); b = slamc3_(&half, &c); goto L10; } /* + END WHILE */ if (a < leps) { leps = a; } /* Computation of EPS complete. Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3 )). Keep dividing A by BETA until (gradual) underflow occurs. T his is detected when we cannot recover the previous A. */ rbase = one / lbeta; small = one; for (i = 1; i <= 3; ++i) { r__1 = small * rbase; small = slamc3_(&r__1, &zero); /* L20: */ } a = slamc3_(&one, &small); slamc4_(&ngpmin, &one, &lbeta); r__1 = -(doublereal)one; slamc4_(&ngnmin, &r__1, &lbeta); slamc4_(&gpmin, &a, &lbeta); r__1 = -(doublereal)a; slamc4_(&gnmin, &r__1, &lbeta); ieee = FALSE_; if (ngpmin == ngnmin && gpmin == gnmin) { if (ngpmin == gpmin) { lemin = ngpmin; /* ( Non twos-complement machines, no gradual under flow; e.g., VAX ) */ } else if (gpmin - ngpmin == 3) { lemin = ngpmin - 1 + lt; ieee = TRUE_; /* ( Non twos-complement machines, with gradual und erflow; e.g., IEEE standard followers ) */ } else { lemin = min(ngpmin,gpmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else if (ngpmin == gpmin && ngnmin == gnmin) { if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) { lemin = max(ngpmin,ngnmin); /* ( Twos-complement machines, no gradual underflow ; e.g., CYBER 205 ) */ } else { lemin = min(ngpmin,ngnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin) { if (gpmin - min(ngpmin,ngnmin) == 3) { lemin = max(ngpmin,ngnmin) - 1 + lt; /* ( Twos-complement machines with gradual underflo w; no known machine ) */ } else { lemin = min(ngpmin,ngnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else { /* Computing MIN */ i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin); lemin = min(i__1,gnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } /* ** Comment out this if block if EMIN is ok */ if (iwarn) { first = TRUE_; printf("\n\n WARNING. The value EMIN may be incorrect:- "); printf("EMIN = %8i\n",lemin); printf("If, after inspection, the value EMIN looks acceptable"); printf("please comment out \n the IF block as marked within the"); printf("code of routine SLAMC2, \n otherwise supply EMIN"); printf("explicitly.\n"); } /* ** Assume IEEE arithmetic if we found denormalised numbers abo ve, or if arithmetic seems to round in the IEEE style, determi ned in routine SLAMC1. A true IEEE machine should have both thi ngs true; however, faulty machines may have one or the other. */ ieee = ieee || lieee1; /* Compute RMIN by successive division by BETA. We could comp ute RMIN as BASE**( EMIN - 1 ), but some machines underflow dur ing this computation. */ lrmin = 1.f; i__1 = 1 - lemin; for (i = 1; i <= 1-lemin; ++i) { r__1 = lrmin * rbase; lrmin = slamc3_(&r__1, &zero); /* L30: */ } /* Finally, call SLAMC5 to compute EMAX and RMAX. */ slamc5_(&lbeta, <, &lemin, &ieee, &lemax, &lrmax); } *beta = lbeta; *t = lt; *rnd = lrnd; *eps = leps; *emin = lemin; *rmin = lrmin; *emax = lemax; *rmax = lrmax; return 0; /* End of SLAMC2 */ } /* slamc2_ */
/* Subroutine */ int slamc1_(integer *beta, integer *t, logical *rnd, logical *ieee1) { /* -- LAPACK auxiliary routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 Purpose ======= SLAMC1 determines the machine parameters given by BETA, T, RND, and IEEE1. Arguments ========= BETA (output) INTEGER The base of the machine. T (output) INTEGER The number of ( BETA ) digits in the mantissa. RND (output) LOGICAL Specifies whether proper rounding ( RND = .TRUE. ) or chopping ( RND = .FALSE. ) occurs in addition. This may not be a reliable guide to the way in which the machine performs its arithmetic. IEEE1 (output) LOGICAL Specifies whether rounding appears to be done in the IEEE 'round to nearest' style. Further Details =============== The routine is based on the routine ENVRON by Malcolm and incorporates suggestions by Gentleman and Marovich. See Malcolm M. A. (1972) Algorithms to reveal properties of floating-point arithmetic. Comms. of the ACM, 15, 949-951. Gentleman W. M. and Marovich S. B. (1974) More on algorithms that reveal properties of floating point arithmetic units. Comms. of the ACM, 17, 276-277. ===================================================================== */ /* Initialized data */ static logical first = TRUE_; /* System generated locals */ real r__1, r__2; /* Local variables */ static logical lrnd; static real a, b, c, f; static integer lbeta; static real savec; static logical lieee1; static real t1, t2; extern doublereal slamc3_(real *, real *); static integer lt; static real one, qtr; if (first) { first = FALSE_; one = 1.f; /* LBETA, LIEEE1, LT and LRND are the local values of BE TA, IEEE1, T and RND. Throughout this routine we use the function SLAMC3 to ens ure that relevant values are stored and not held in registers, or are not affected by optimizers. Compute a = 2.0**m with the smallest positive integer m s uch that fl( a + 1.0 ) = a. */ a = 1.f; c = 1.f; /* + WHILE( C.EQ.ONE )LOOP */ L10: if (c == one) { a *= 2; c = slamc3_(&a, &one); r__1 = -(doublereal)a; c = slamc3_(&c, &r__1); goto L10; } /* + END WHILE Now compute b = 2.0**m with the smallest positive integer m such that fl( a + b ) .gt. a. */ b = 1.f; c = slamc3_(&a, &b); /* + WHILE( C.EQ.A )LOOP */ L20: if (c == a) { b *= 2; c = slamc3_(&a, &b); goto L20; } /* + END WHILE Now compute the base. a and c are neighbouring floating po int numbers in the interval ( beta**t, beta**( t + 1 ) ) and so their difference is beta. Adding 0.25 to c is to ensure that it is truncated to beta and not ( beta - 1 ). */ qtr = one / 4; savec = c; r__1 = -(doublereal)a; c = slamc3_(&c, &r__1); lbeta = c + qtr; /* Now determine whether rounding or chopping occurs, by addin g a bit less than beta/2 and a bit more than beta/2 to a. */ b = (real) lbeta; r__1 = b / 2; r__2 = -(doublereal)b / 100; f = slamc3_(&r__1, &r__2); c = slamc3_(&f, &a); if (c == a) { lrnd = TRUE_; } else { lrnd = FALSE_; } r__1 = b / 2; r__2 = b / 100; f = slamc3_(&r__1, &r__2); c = slamc3_(&f, &a); if (lrnd && c == a) { lrnd = FALSE_; } /* Try and decide whether rounding is done in the IEEE 'round to nearest' style. B/2 is half a unit in the last place of the two numbers A and SAVEC. Furthermore, A is even, i.e. has last bit zero, and SAVEC is odd. Thus adding B/2 to A should not cha nge A, but adding B/2 to SAVEC should change SAVEC. */ r__1 = b / 2; t1 = slamc3_(&r__1, &a); r__1 = b / 2; t2 = slamc3_(&r__1, &savec); lieee1 = t1 == a && t2 > savec && lrnd; /* Now find the mantissa, t. It should be the integer part of log to the base beta of a, however it is safer to determine t by powering. So we find t as the smallest positive integer for which fl( beta**t + 1.0 ) = 1.0. */ lt = 0; a = 1.f; c = 1.f; /* + WHILE( C.EQ.ONE )LOOP */ L30: if (c == one) { ++lt; a *= lbeta; c = slamc3_(&a, &one); r__1 = -(doublereal)a; c = slamc3_(&c, &r__1); goto L30; } /* + END WHILE */ } *beta = lbeta; *t = lt; *rnd = lrnd; *ieee1 = lieee1; return 0; /* End of SLAMC1 */ } /* slamc1_ */
/* Subroutine */ int slasd8_(integer *icompq, integer *k, real *d__, real * z__, real *vf, real *vl, real *difl, real *difr, integer *lddifr, real *dsigma, real *work, integer *info) { /* System generated locals */ integer difr_dim1, difr_offset, i__1, i__2; real r__1, r__2; /* Builtin functions */ double sqrt(doublereal), r_sign(real *, real *); /* Local variables */ static real temp; extern doublereal sdot_(integer *, real *, integer *, real *, integer *); static integer iwk2i, iwk3i; extern doublereal snrm2_(integer *, real *, integer *); static integer i__, j; static real diflj, difrj, dsigj; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *); extern doublereal slamc3_(real *, real *); extern /* Subroutine */ int slasd4_(integer *, integer *, real *, real *, real *, real *, real *, real *, integer *); static real dj; extern /* Subroutine */ int xerbla_(char *, integer *); static real dsigjp; extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, real *, integer *, integer *, real *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *, real *, integer *); static real rho; static integer iwk1, iwk2, iwk3; #define difr_ref(a_1,a_2) difr[(a_2)*difr_dim1 + a_1] /* -- LAPACK auxiliary routine (instrumented to count ops, version 3.0) -- Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, Courant Institute, NAG Ltd., and Rice University June 30, 1999 Purpose ======= SLASD8 finds the square roots of the roots of the secular equation, as defined by the values in DSIGMA and Z. It makes the appropriate calls to SLASD4, and stores, for each element in D, the distance to its two nearest poles (elements in DSIGMA). It also updates the arrays VF and VL, the first and last components of all the right singular vectors of the original bidiagonal matrix. SLASD8 is called from SLASD6. Arguments ========= ICOMPQ (input) INTEGER Specifies whether singular vectors are to be computed in factored form in the calling routine: = 0: Compute singular values only. = 1: Compute singular vectors in factored form as well. K (input) INTEGER The number of terms in the rational function to be solved by SLASD4. K >= 1. D (output) REAL array, dimension ( K ) On output, D contains the updated singular values. Z (input) REAL array, dimension ( K ) The first K elements of this array contain the components of the deflation-adjusted updating row vector. VF (input/output) REAL array, dimension ( K ) On entry, VF contains information passed through DBEDE8. On exit, VF contains the first K components of the first components of all right singular vectors of the bidiagonal matrix. VL (input/output) REAL array, dimension ( K ) On entry, VL contains information passed through DBEDE8. On exit, VL contains the first K components of the last components of all right singular vectors of the bidiagonal matrix. DIFL (output) REAL array, dimension ( K ) On exit, DIFL(I) = D(I) - DSIGMA(I). DIFR (output) REAL array, dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and dimension ( K ) if ICOMPQ = 0. On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not defined and will not be referenced. If ICOMPQ = 1, DIFR(1:K,2) is an array containing the normalizing factors for the right singular vector matrix. LDDIFR (input) INTEGER The leading dimension of DIFR, must be at least K. DSIGMA (input) REAL array, dimension ( K ) The first K elements of this array contain the old roots of the deflated updating problem. These are the poles of the secular equation. WORK (workspace) REAL array, dimension at least 3 * K INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if INFO = 1, an singular value did not converge Further Details =============== Based on contributions by Ming Gu and Huan Ren, Computer Science Division, University of California at Berkeley, USA ===================================================================== Test the input parameters. Parameter adjustments */ --d__; --z__; --vf; --vl; --difl; difr_dim1 = *lddifr; difr_offset = 1 + difr_dim1 * 1; difr -= difr_offset; --dsigma; --work; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*k < 1) { *info = -2; } else if (*lddifr < *k) { *info = -9; } if (*info != 0) { i__1 = -(*info); xerbla_("SLASD8", &i__1); return 0; } /* Quick return if possible */ if (*k == 1) { d__[1] = dabs(z__[1]); difl[1] = d__[1]; if (*icompq == 1) { difl[2] = 1.f; difr_ref(1, 2) = 1.f; } return 0; } /* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can be computed with high relative accuracy (barring over/underflow). This is a problem on machines without a guard digit in add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), which on any of these machines zeros out the bottommost bit of DSIGMA(I) if it is 1; this makes the subsequent subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation occurs. On binary machines with a guard digit (almost all machines) it does not change DSIGMA(I) at all. On hexadecimal and decimal machines with a guard digit, it slightly changes the bottommost bits of DSIGMA(I). It does not account for hexadecimal or decimal machines without guard digits (we know of none). We use a subroutine call to compute 2*DLAMBDA(I) to prevent optimizing compilers from eliminating this code. */ latime_1.ops += (real) (*k << 1); i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { dsigma[i__] = slamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; /* L10: */ } /* Book keeping. */ iwk1 = 1; iwk2 = iwk1 + *k; iwk3 = iwk2 + *k; iwk2i = iwk2 - 1; iwk3i = iwk3 - 1; /* Normalize Z. */ latime_1.ops += (real) (*k * 3 + 1); rho = snrm2_(k, &z__[1], &c__1); slascl_("G", &c__0, &c__0, &rho, &c_b8, k, &c__1, &z__[1], k, info); rho *= rho; /* Initialize WORK(IWK3). */ slaset_("A", k, &c__1, &c_b8, &c_b8, &work[iwk3], k); /* Compute the updated singular values, the arrays DIFL, DIFR, and the updated Z. */ i__1 = *k; for (j = 1; j <= i__1; ++j) { slasd4_(k, &j, &dsigma[1], &z__[1], &work[iwk1], &rho, &d__[j], &work[ iwk2], info); /* If the root finder fails, the computation is terminated. */ if (*info != 0) { return 0; } latime_1.ops += 2.f; work[iwk3i + j] = work[iwk3i + j] * work[j] * work[iwk2i + j]; difl[j] = -work[j]; difr_ref(j, 1) = -work[j + 1]; latime_1.ops += (real) ((j - 1) * 6); i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ j]); /* L20: */ } latime_1.ops += (real) ((*k - j) * 6); i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ j]); /* L30: */ } /* L40: */ } /* Compute updated Z. */ latime_1.ops += (real) (*k); i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { r__2 = sqrt((r__1 = work[iwk3i + i__], dabs(r__1))); z__[i__] = r_sign(&r__2, &z__[i__]); /* L50: */ } /* Update VF and VL. */ i__1 = *k; for (j = 1; j <= i__1; ++j) { diflj = difl[j]; dj = d__[j]; dsigj = -dsigma[j]; if (j < *k) { difrj = -difr_ref(j, 1); dsigjp = -dsigma[j + 1]; } latime_1.ops += 3.f; work[j] = -z__[j] / diflj / (dsigma[j] + dj); latime_1.ops += (real) ((j - 1) * 5); i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] = z__[i__] / (slamc3_(&dsigma[i__], &dsigj) - diflj) / ( dsigma[i__] + dj); /* L60: */ } latime_1.ops += (real) ((*k - j) * 5); i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { work[i__] = z__[i__] / (slamc3_(&dsigma[i__], &dsigjp) + difrj) / (dsigma[i__] + dj); /* L70: */ } latime_1.ops += (real) (*k * 6); temp = snrm2_(k, &work[1], &c__1); work[iwk2i + j] = sdot_(k, &work[1], &c__1, &vf[1], &c__1) / temp; work[iwk3i + j] = sdot_(k, &work[1], &c__1, &vl[1], &c__1) / temp; if (*icompq == 1) { difr_ref(j, 2) = temp; } /* L80: */ } scopy_(k, &work[iwk2], &c__1, &vf[1], &c__1); scopy_(k, &work[iwk3], &c__1, &vl[1], &c__1); return 0; /* End of SLASD8 */ } /* slasd8_ */
/* Subroutine */ int slals0_(integer *icompq, integer *nl, integer *nr, integer *sqre, integer *nrhs, real *b, integer *ldb, real *bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol, integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real * difl, real *difr, real *z__, integer *k, real *c__, real *s, real * work, integer *info) { /* System generated locals */ integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, i__1, i__2; real r__1; /* Local variables */ integer i__, j, m, n; real dj; integer nlp1; real temp; real diflj, difrj, dsigj; real dsigjp; /* -- LAPACK routine (version 3.2) -- */ /* November 2006 */ /* Purpose */ /* ======= */ /* SLALS0 applies back the multiplying factors of either the left or the */ /* right singular vector matrix of a diagonal matrix appended by a row */ /* to the right hand side matrix B in solving the least squares problem */ /* using the divide-and-conquer SVD approach. */ /* For the left singular vector matrix, three types of orthogonal */ /* matrices are involved: */ /* (1L) Givens rotations: the number of such rotations is GIVPTR; the */ /* pairs of columns/rows they were applied to are stored in GIVCOL; */ /* and the C- and S-values of these rotations are stored in GIVNUM. */ /* (2L) Permutation. The (NL+1)-st row of B is to be moved to the first */ /* row, and for J=2:N, PERM(J)-th row of B is to be moved to the */ /* J-th row. */ /* (3L) The left singular vector matrix of the remaining matrix. */ /* For the right singular vector matrix, four types of orthogonal */ /* matrices are involved: */ /* (1R) The right singular vector matrix of the remaining matrix. */ /* (2R) If SQRE = 1, one extra Givens rotation to generate the right */ /* null space. */ /* (3R) The inverse transformation of (2L). */ /* (4R) The inverse transformation of (1L). */ /* Arguments */ /* ========= */ /* ICOMPQ (input) INTEGER */ /* Specifies whether singular vectors are to be computed in */ /* factored form: */ /* = 0: Left singular vector matrix. */ /* = 1: Right singular vector matrix. */ /* NL (input) INTEGER */ /* The row dimension of the upper block. NL >= 1. */ /* NR (input) INTEGER */ /* The row dimension of the lower block. NR >= 1. */ /* SQRE (input) INTEGER */ /* = 0: the lower block is an NR-by-NR square matrix. */ /* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */ /* The bidiagonal matrix has row dimension N = NL + NR + 1, */ /* and column dimension M = N + SQRE. */ /* NRHS (input) INTEGER */ /* The number of columns of B and BX. NRHS must be at least 1. */ /* B (input/output) REAL array, dimension ( LDB, NRHS ) */ /* On input, B contains the right hand sides of the least */ /* squares problem in rows 1 through M. On output, B contains */ /* the solution X in rows 1 through N. */ /* LDB (input) INTEGER */ /* The leading dimension of B. LDB must be at least */ /* max(1,MAX( M, N ) ). */ /* BX (workspace) REAL array, dimension ( LDBX, NRHS ) */ /* LDBX (input) INTEGER */ /* The leading dimension of BX. */ /* PERM (input) INTEGER array, dimension ( N ) */ /* The permutations (from deflation and sorting) applied */ /* to the two blocks. */ /* GIVPTR (input) INTEGER */ /* The number of Givens rotations which took place in this */ /* subproblem. */ /* GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) */ /* Each pair of numbers indicates a pair of rows/columns */ /* involved in a Givens rotation. */ /* LDGCOL (input) INTEGER */ /* The leading dimension of GIVCOL, must be at least N. */ /* GIVNUM (input) REAL array, dimension ( LDGNUM, 2 ) */ /* Each number indicates the C or S value used in the */ /* corresponding Givens rotation. */ /* LDGNUM (input) INTEGER */ /* The leading dimension of arrays DIFR, POLES and */ /* GIVNUM, must be at least K. */ /* POLES (input) REAL array, dimension ( LDGNUM, 2 ) */ /* On entry, POLES(1:K, 1) contains the new singular */ /* values obtained from solving the secular equation, and */ /* POLES(1:K, 2) is an array containing the poles in the secular */ /* equation. */ /* DIFL (input) REAL array, dimension ( K ). */ /* On entry, DIFL(I) is the distance between I-th updated */ /* (undeflated) singular value and the I-th (undeflated) old */ /* singular value. */ /* DIFR (input) REAL array, dimension ( LDGNUM, 2 ). */ /* On entry, DIFR(I, 1) contains the distances between I-th */ /* updated (undeflated) singular value and the I+1-th */ /* (undeflated) old singular value. And DIFR(I, 2) is the */ /* normalizing factor for the I-th right singular vector. */ /* Z (input) REAL array, dimension ( K ) */ /* Contain the components of the deflation-adjusted updating row */ /* vector. */ /* K (input) INTEGER */ /* Contains the dimension of the non-deflated matrix, */ /* This is the order of the related secular equation. 1 <= K <=N. */ /* C (input) REAL */ /* C contains garbage if SQRE =0 and the C-value of a Givens */ /* rotation related to the right null space if SQRE = 1. */ /* S (input) REAL */ /* S contains garbage if SQRE =0 and the S-value of a Givens */ /* rotation related to the right null space if SQRE = 1. */ /* WORK (workspace) REAL array, dimension ( K ) */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ming Gu and Ren-Cang Li, Computer Science Division, University of */ /* California at Berkeley, USA */ /* Osni Marques, LBNL/NERSC, USA */ /* ===================================================================== */ /* Test the input parameters. */ /* Parameter adjustments */ b_dim1 = *ldb; b_offset = 1 + b_dim1; b -= b_offset; bx_dim1 = *ldbx; bx_offset = 1 + bx_dim1; bx -= bx_offset; --perm; givcol_dim1 = *ldgcol; givcol_offset = 1 + givcol_dim1; givcol -= givcol_offset; difr_dim1 = *ldgnum; difr_offset = 1 + difr_dim1; difr -= difr_offset; poles_dim1 = *ldgnum; poles_offset = 1 + poles_dim1; poles -= poles_offset; givnum_dim1 = *ldgnum; givnum_offset = 1 + givnum_dim1; givnum -= givnum_offset; --difl; --z__; --work; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*nl < 1) { *info = -2; } else if (*nr < 1) { *info = -3; } else if (*sqre < 0 || *sqre > 1) { *info = -4; } n = *nl + *nr + 1; if (*nrhs < 1) { *info = -5; } else if (*ldb < n) { *info = -7; } else if (*ldbx < n) { *info = -9; } else if (*givptr < 0) { *info = -11; } else if (*ldgcol < n) { *info = -13; } else if (*ldgnum < n) { *info = -15; } else if (*k < 1) { *info = -20; } if (*info != 0) { i__1 = -(*info); xerbla_("SLALS0", &i__1); return 0; } m = n + *sqre; nlp1 = *nl + 1; if (*icompq == 0) { /* Apply back orthogonal transformations from the left. */ /* Step (1L): apply back the Givens rotations performed. */ i__1 = *givptr; for (i__ = 1; i__ <= i__1; ++i__) { srot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + (givnum_dim1 << 1)], &givnum[i__ + givnum_dim1]); } /* Step (2L): permute rows of B. */ scopy_(nrhs, &b[nlp1 + b_dim1], ldb, &bx[bx_dim1 + 1], ldbx); i__1 = n; for (i__ = 2; i__ <= i__1; ++i__) { scopy_(nrhs, &b[perm[i__] + b_dim1], ldb, &bx[i__ + bx_dim1], ldbx); } /* Step (3L): apply the inverse of the left singular vector */ /* matrix to BX. */ if (*k == 1) { scopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb); if (z__[1] < 0.f) { sscal_(nrhs, &c_b5, &b[b_offset], ldb); } } else { i__1 = *k; for (j = 1; j <= i__1; ++j) { diflj = difl[j]; dj = poles[j + poles_dim1]; dsigj = -poles[j + (poles_dim1 << 1)]; if (j < *k) { difrj = -difr[j + difr_dim1]; dsigjp = -poles[j + 1 + (poles_dim1 << 1)]; } if (z__[j] == 0.f || poles[j + (poles_dim1 << 1)] == 0.f) { work[j] = 0.f; } else { work[j] = -poles[j + (poles_dim1 << 1)] * z__[j] / diflj / (poles[j + (poles_dim1 << 1)] + dj); } i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { if (z__[i__] == 0.f || poles[i__ + (poles_dim1 << 1)] == 0.f) { work[i__] = 0.f; } else { work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] / (slamc3_(&poles[i__ + (poles_dim1 << 1)], & dsigj) - diflj) / (poles[i__ + (poles_dim1 << 1)] + dj); } } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { if (z__[i__] == 0.f || poles[i__ + (poles_dim1 << 1)] == 0.f) { work[i__] = 0.f; } else { work[i__] = poles[i__ + (poles_dim1 << 1)] * z__[i__] / (slamc3_(&poles[i__ + (poles_dim1 << 1)], & dsigjp) + difrj) / (poles[i__ + (poles_dim1 << 1)] + dj); } } work[1] = -1.f; temp = snrm2_(k, &work[1], &c__1); sgemv_("T", k, nrhs, &c_b11, &bx[bx_offset], ldbx, &work[1], & c__1, &c_b13, &b[j + b_dim1], ldb); slascl_("G", &c__0, &c__0, &temp, &c_b11, &c__1, nrhs, &b[j + b_dim1], ldb, info); } } /* Move the deflated rows of BX to B also. */ if (*k < max(m,n)) { i__1 = n - *k; slacpy_("A", &i__1, nrhs, &bx[*k + 1 + bx_dim1], ldbx, &b[*k + 1 + b_dim1], ldb); } } else { /* Apply back the right orthogonal transformations. */ /* Step (1R): apply back the new right singular vector matrix */ /* to B. */ if (*k == 1) { scopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx); } else { i__1 = *k; for (j = 1; j <= i__1; ++j) { dsigj = poles[j + (poles_dim1 << 1)]; if (z__[j] == 0.f) { work[j] = 0.f; } else { work[j] = -z__[j] / difl[j] / (dsigj + poles[j + poles_dim1]) / difr[j + (difr_dim1 << 1)]; } i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { if (z__[j] == 0.f) { work[i__] = 0.f; } else { r__1 = -poles[i__ + 1 + (poles_dim1 << 1)]; work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difr[ i__ + difr_dim1]) / (dsigj + poles[i__ + poles_dim1]) / difr[i__ + (difr_dim1 << 1)]; } } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { if (z__[j] == 0.f) { work[i__] = 0.f; } else { r__1 = -poles[i__ + (poles_dim1 << 1)]; work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difl[ i__]) / (dsigj + poles[i__ + poles_dim1]) / difr[i__ + (difr_dim1 << 1)]; } } sgemv_("T", k, nrhs, &c_b11, &b[b_offset], ldb, &work[1], & c__1, &c_b13, &bx[j + bx_dim1], ldbx); } } /* Step (2R): if SQRE = 1, apply back the rotation that is */ /* related to the right null space of the subproblem. */ if (*sqre == 1) { scopy_(nrhs, &b[m + b_dim1], ldb, &bx[m + bx_dim1], ldbx); srot_(nrhs, &bx[bx_dim1 + 1], ldbx, &bx[m + bx_dim1], ldbx, c__, s); } if (*k < max(m,n)) { i__1 = n - *k; slacpy_("A", &i__1, nrhs, &b[*k + 1 + b_dim1], ldb, &bx[*k + 1 + bx_dim1], ldbx); } /* Step (3R): permute rows of B. */ scopy_(nrhs, &bx[bx_dim1 + 1], ldbx, &b[nlp1 + b_dim1], ldb); if (*sqre == 1) { scopy_(nrhs, &bx[m + bx_dim1], ldbx, &b[m + b_dim1], ldb); } i__1 = n; for (i__ = 2; i__ <= i__1; ++i__) { scopy_(nrhs, &bx[i__ + bx_dim1], ldbx, &b[perm[i__] + b_dim1], ldb); } /* Step (4R): apply back the Givens rotations performed. */ for (i__ = *givptr; i__ >= 1; --i__) { r__1 = -givnum[i__ + givnum_dim1]; srot_(nrhs, &b[givcol[i__ + (givcol_dim1 << 1)] + b_dim1], ldb, & b[givcol[i__ + givcol_dim1] + b_dim1], ldb, &givnum[i__ + (givnum_dim1 << 1)], &r__1); } } return 0; /* End of SLALS0 */ } /* slals0_ */
/*< SUBROUTINE SLAMC2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX ) >*/ /* Subroutine */ int slamc2_(integer *beta, integer *t, logical *rnd, real * eps, integer *emin, real *rmin, integer *emax, real *rmax) { /* Initialized data */ static logical first = TRUE_; /* runtime-initialized constant */ static logical iwarn = FALSE_; /* runtime-initialized constant */ /* System generated locals */ integer i__1; real r__1, r__2, r__3, r__4, r__5; /* Builtin functions */ double pow_ri(real *, integer *); /* Local variables */ real a, b, c__; integer i__; static integer lt; /* runtime-initialized constant */ real one, two; logical ieee; real half; logical lrnd = 0; //variable 'lrnd' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] static real leps; /* runtime-initialized constant */ real zero; static integer lbeta; /* runtime-initialized constant */ real rbase; static integer lemin, lemax; /* runtime-initialized constant */ integer gnmin; real small; integer gpmin; real third; static real lrmin, lrmax; /* runtime-initialized constant */ real sixth; logical lieee1; extern /* Subroutine */ int slamc1_(integer *, integer *, logical *, logical *); extern doublereal slamc3_(real *, real *); extern /* Subroutine */ int slamc4_(integer *, real *, integer *), slamc5_(integer *, integer *, integer *, logical *, integer *, real *); integer ngnmin, ngpmin; /* -- LAPACK auxiliary routine (version 1.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ /* Courant Institute, Argonne National Lab, and Rice University */ /* October 31, 1992 */ /* .. Scalar Arguments .. */ /*< LOGICAL RND >*/ /*< INTEGER BETA, EMAX, EMIN, T >*/ /*< REAL EPS, RMAX, RMIN >*/ /* .. */ /* Purpose */ /* ======= */ /* SLAMC2 determines the machine parameters specified in its argument */ /* list. */ /* Arguments */ /* ========= */ /* BETA (output) INTEGER */ /* The base of the machine. */ /* T (output) INTEGER */ /* The number of ( BETA ) digits in the mantissa. */ /* RND (output) LOGICAL */ /* Specifies whether proper rounding ( RND = .TRUE. ) or */ /* chopping ( RND = .FALSE. ) occurs in addition. This may not */ /* be a reliable guide to the way in which the machine performs */ /* its arithmetic. */ /* EPS (output) REAL */ /* The smallest positive number such that */ /* fl( 1.0 - EPS ) .LT. 1.0, */ /* where fl denotes the computed value. */ /* EMIN (output) INTEGER */ /* The minimum exponent before (gradual) underflow occurs. */ /* RMIN (output) REAL */ /* The smallest normalized number for the machine, given by */ /* BASE**( EMIN - 1 ), where BASE is the floating point value */ /* of BETA. */ /* EMAX (output) INTEGER */ /* The maximum exponent before overflow occurs. */ /* RMAX (output) REAL */ /* The largest positive number for the machine, given by */ /* BASE**EMAX * ( 1 - EPS ), where BASE is the floating point */ /* value of BETA. */ /* Further Details */ /* =============== */ /* The computation of EPS is based on a routine PARANOIA by */ /* W. Kahan of the University of California at Berkeley. */ /* ===================================================================== */ /* .. Local Scalars .. */ /*< LOGICAL FIRST, IEEE, IWARN, LIEEE1, LRND >*/ /*< >*/ /*< >*/ /* .. */ /* .. External Functions .. */ /*< REAL SLAMC3 >*/ /*< EXTERNAL SLAMC3 >*/ /* .. */ /* .. External Subroutines .. */ /*< EXTERNAL SLAMC1, SLAMC4, SLAMC5 >*/ /* .. */ /* .. Intrinsic Functions .. */ /*< INTRINSIC ABS, MAX, MIN >*/ /* .. */ /* .. Save statement .. */ /*< >*/ /* .. */ /* .. Data statements .. */ /*< DATA FIRST / .TRUE. / , IWARN / .FALSE. / >*/ /* .. */ /* .. Executable Statements .. */ /*< IF( FIRST ) THEN >*/ if (first) { /*< FIRST = .FALSE. >*/ first = FALSE_; /*< ZERO = 0 >*/ zero = (float)0.; /*< ONE = 1 >*/ one = (float)1.; /*< TWO = 2 >*/ two = (float)2.; /* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of */ /* BETA, T, RND, EPS, EMIN and RMIN. */ /* Throughout this routine we use the function SLAMC3 to ensure */ /* that relevant values are stored and not held in registers, or */ /* are not affected by optimizers. */ /* SLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. */ /*< CALL SLAMC1( LBETA, LT, LRND, LIEEE1 ) >*/ slamc1_(&lbeta, <, &lrnd, &lieee1); /* Start to find EPS. */ /*< B = LBETA >*/ b = (real) lbeta; /*< A = B**( -LT ) >*/ i__1 = -lt; a = pow_ri(&b, &i__1); /*< LEPS = A >*/ leps = a; /* Try some tricks to see whether or not this is the correct EPS. */ /*< B = TWO / 3 >*/ b = two / 3; /*< HALF = ONE / 2 >*/ half = one / 2; /*< SIXTH = SLAMC3( B, -HALF ) >*/ r__1 = -half; sixth = slamc3_(&b, &r__1); /*< THIRD = SLAMC3( SIXTH, SIXTH ) >*/ third = slamc3_(&sixth, &sixth); /*< B = SLAMC3( THIRD, -HALF ) >*/ r__1 = -half; b = slamc3_(&third, &r__1); /*< B = SLAMC3( B, SIXTH ) >*/ b = slamc3_(&b, &sixth); /*< B = ABS( B ) >*/ b = dabs(b); /*< >*/ if (b < leps) { b = leps; } /*< LEPS = 1 >*/ leps = (float)1.; /* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */ /*< 10 CONTINUE >*/ L10: /*< IF( ( LEPS.GT.B ) .AND. ( B.GT.ZERO ) ) THEN >*/ if (leps > b && b > zero) { /*< LEPS = B >*/ leps = b; /*< C = SLAMC3( HALF*LEPS, ( TWO**5 )*( LEPS**2 ) ) >*/ r__1 = half * leps; /* Computing 5th power */ r__3 = two, r__4 = r__3, r__3 *= r__3; /* Computing 2nd power */ r__5 = leps; r__2 = r__4 * (r__3 * r__3) * (r__5 * r__5); c__ = slamc3_(&r__1, &r__2); /*< C = SLAMC3( HALF, -C ) >*/ r__1 = -c__; c__ = slamc3_(&half, &r__1); /*< B = SLAMC3( HALF, C ) >*/ b = slamc3_(&half, &c__); /*< C = SLAMC3( HALF, -B ) >*/ r__1 = -b; c__ = slamc3_(&half, &r__1); /*< B = SLAMC3( HALF, C ) >*/ b = slamc3_(&half, &c__); /*< GO TO 10 >*/ goto L10; /*< END IF >*/ } /* + END WHILE */ /*< >*/ if (a < leps) { leps = a; } /* Computation of EPS complete. */ /* Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). */ /* Keep dividing A by BETA until (gradual) underflow occurs. This */ /* is detected when we cannot recover the previous A. */ /*< RBASE = ONE / LBETA >*/ rbase = one / lbeta; /*< SMALL = ONE >*/ small = one; /*< DO 20 I = 1, 3 >*/ for (i__ = 1; i__ <= 3; ++i__) { /*< SMALL = SLAMC3( SMALL*RBASE, ZERO ) >*/ r__1 = small * rbase; small = slamc3_(&r__1, &zero); /*< 20 CONTINUE >*/ /* L20: */ } /*< A = SLAMC3( ONE, SMALL ) >*/ a = slamc3_(&one, &small); /*< CALL SLAMC4( NGPMIN, ONE, LBETA ) >*/ slamc4_(&ngpmin, &one, &lbeta); /*< CALL SLAMC4( NGNMIN, -ONE, LBETA ) >*/ r__1 = -one; slamc4_(&ngnmin, &r__1, &lbeta); /*< CALL SLAMC4( GPMIN, A, LBETA ) >*/ slamc4_(&gpmin, &a, &lbeta); /*< CALL SLAMC4( GNMIN, -A, LBETA ) >*/ r__1 = -a; slamc4_(&gnmin, &r__1, &lbeta); /*< IEEE = .FALSE. >*/ ieee = FALSE_; /*< IF( ( NGPMIN.EQ.NGNMIN ) .AND. ( GPMIN.EQ.GNMIN ) ) THEN >*/ if (ngpmin == ngnmin && gpmin == gnmin) { /*< IF( NGPMIN.EQ.GPMIN ) THEN >*/ if (ngpmin == gpmin) { /*< LEMIN = NGPMIN >*/ lemin = ngpmin; /* ( Non twos-complement machines, no gradual underflow; */ /* e.g., VAX ) */ /*< ELSE IF( ( GPMIN-NGPMIN ).EQ.3 ) THEN >*/ } else if (gpmin - ngpmin == 3) { /*< LEMIN = NGPMIN - 1 + LT >*/ lemin = ngpmin - 1 + lt; /*< IEEE = .TRUE. >*/ ieee = TRUE_; /* ( Non twos-complement machines, with gradual underflow; */ /* e.g., IEEE standard followers ) */ /*< ELSE >*/ } else { /*< LEMIN = MIN( NGPMIN, GPMIN ) >*/ lemin = min(ngpmin,gpmin); /* ( A guess; no known machine ) */ /*< IWARN = .TRUE. >*/ iwarn = TRUE_; /*< END IF >*/ } /*< ELSE IF( ( NGPMIN.EQ.GPMIN ) .AND. ( NGNMIN.EQ.GNMIN ) ) THEN >*/ } else if (ngpmin == gpmin && ngnmin == gnmin) { /*< IF( ABS( NGPMIN-NGNMIN ).EQ.1 ) THEN >*/ if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) { /*< LEMIN = MAX( NGPMIN, NGNMIN ) >*/ lemin = max(ngpmin,ngnmin); /* ( Twos-complement machines, no gradual underflow; */ /* e.g., CYBER 205 ) */ /*< ELSE >*/ } else { /*< LEMIN = MIN( NGPMIN, NGNMIN ) >*/ lemin = min(ngpmin,ngnmin); /* ( A guess; no known machine ) */ /*< IWARN = .TRUE. >*/ iwarn = TRUE_; /*< END IF >*/ } /*< >*/ } else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin) { /*< IF( ( GPMIN-MIN( NGPMIN, NGNMIN ) ).EQ.3 ) THEN >*/ if (gpmin - min(ngpmin,ngnmin) == 3) { /*< LEMIN = MAX( NGPMIN, NGNMIN ) - 1 + LT >*/ lemin = max(ngpmin,ngnmin) - 1 + lt; /* ( Twos-complement machines with gradual underflow; */ /* no known machine ) */ /*< ELSE >*/ } else { /*< LEMIN = MIN( NGPMIN, NGNMIN ) >*/ lemin = min(ngpmin,ngnmin); /* ( A guess; no known machine ) */ /*< IWARN = .TRUE. >*/ iwarn = TRUE_; /*< END IF >*/ } /*< ELSE >*/ } else { /*< LEMIN = MIN( NGPMIN, NGNMIN, GPMIN, GNMIN ) >*/ /* Computing MIN */ i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin); lemin = min(i__1,gnmin); /* ( A guess; no known machine ) */ /*< IWARN = .TRUE. >*/ iwarn = TRUE_; /*< END IF >*/ } /* ** */ /* Comment out this if block if EMIN is ok */ /*< IF( IWARN ) THEN >*/ if (iwarn) { /*< FIRST = .TRUE. >*/ first = TRUE_; /*< WRITE( 6, FMT = 9999 )LEMIN >*/ printf("\n\n WARNING. The value EMIN may be incorrect: - "); printf("EMIN = %8li\n",lemin); printf("If, after inspection, the value EMIN looks acceptable"); printf(" please comment out\n the IF block as marked within the"); printf(" code of routine SLAMC2,\n otherwise supply EMIN"); printf(" explicitly.\n"); /*< END IF >*/ } /* ** */ /* Assume IEEE arithmetic if we found denormalised numbers above, */ /* or if arithmetic seems to round in the IEEE style, determined */ /* in routine SLAMC1. A true IEEE machine should have both things */ /* true; however, faulty machines may have one or the other. */ /*< IEEE = IEEE .OR. LIEEE1 >*/ ieee = ieee || lieee1; /* Compute RMIN by successive division by BETA. We could compute */ /* RMIN as BASE**( EMIN - 1 ), but some machines underflow during */ /* this computation. */ /*< LRMIN = 1 >*/ lrmin = (float)1.; /*< DO 30 I = 1, 1 - LEMIN >*/ i__1 = 1 - lemin; for (i__ = 1; i__ <= i__1; ++i__) { /*< LRMIN = SLAMC3( LRMIN*RBASE, ZERO ) >*/ r__1 = lrmin * rbase; lrmin = slamc3_(&r__1, &zero); /*< 30 CONTINUE >*/ /* L30: */ } /* Finally, call SLAMC5 to compute EMAX and RMAX. */ /*< CALL SLAMC5( LBETA, LT, LEMIN, IEEE, LEMAX, LRMAX ) >*/ slamc5_(&lbeta, <, &lemin, &ieee, &lemax, &lrmax); /*< END IF >*/ } /*< BETA = LBETA >*/ *beta = lbeta; /*< T = LT >*/ *t = lt; /*< RND = LRND >*/ *rnd = lrnd; /*< EPS = LEPS >*/ *eps = leps; /*< EMIN = LEMIN >*/ *emin = lemin; /*< RMIN = LRMIN >*/ *rmin = lrmin; /*< EMAX = LEMAX >*/ *emax = lemax; /*< RMAX = LRMAX >*/ *rmax = lrmax; /*< RETURN >*/ return 0; /*< 9 >*/ /* End of SLAMC2 */ /*< END >*/ } /* slamc2_ */
/*< SUBROUTINE SLAMC5( BETA, P, EMIN, IEEE, EMAX, RMAX ) >*/ /* Subroutine */ int slamc5_(integer *beta, integer *p, integer *emin, logical *ieee, integer *emax, real *rmax) { /* System generated locals */ integer i__1; real r__1; /* Local variables */ integer i__; real y, z__; integer try__, lexp; real oldy=0; integer uexp, nbits; extern doublereal slamc3_(real *, real *); real recbas; integer exbits, expsum; /* -- LAPACK auxiliary routine (version 1.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ /* Courant Institute, Argonne National Lab, and Rice University */ /* October 31, 1992 */ /* .. Scalar Arguments .. */ /*< LOGICAL IEEE >*/ /*< INTEGER BETA, EMAX, EMIN, P >*/ /*< REAL RMAX >*/ /* .. */ /* Purpose */ /* ======= */ /* SLAMC5 attempts to compute RMAX, the largest machine floating-point */ /* number, without overflow. It assumes that EMAX + abs(EMIN) sum */ /* approximately to a power of 2. It will fail on machines where this */ /* assumption does not hold, for example, the Cyber 205 (EMIN = -28625, */ /* EMAX = 28718). It will also fail if the value supplied for EMIN is */ /* too large (i.e. too close to zero), probably with overflow. */ /* Arguments */ /* ========= */ /* BETA (input) INTEGER */ /* The base of floating-point arithmetic. */ /* P (input) INTEGER */ /* The number of base BETA digits in the mantissa of a */ /* floating-point value. */ /* EMIN (input) INTEGER */ /* The minimum exponent before (gradual) underflow. */ /* IEEE (input) LOGICAL */ /* A logical flag specifying whether or not the arithmetic */ /* system is thought to comply with the IEEE standard. */ /* EMAX (output) INTEGER */ /* The largest exponent before overflow */ /* RMAX (output) REAL */ /* The largest machine floating-point number. */ /* ===================================================================== */ /* .. Parameters .. */ /*< REAL ZERO, ONE >*/ /*< PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 ) >*/ /* .. */ /* .. Local Scalars .. */ /*< INTEGER EXBITS, EXPSUM, I, LEXP, NBITS, TRY, UEXP >*/ /*< REAL OLDY, RECBAS, Y, Z >*/ /* .. */ /* .. External Functions .. */ /*< REAL SLAMC3 >*/ /*< EXTERNAL SLAMC3 >*/ /* .. */ /* .. Intrinsic Functions .. */ /*< INTRINSIC MOD >*/ /* .. */ /* .. Executable Statements .. */ /* First compute LEXP and UEXP, two powers of 2 that bound */ /* abs(EMIN). We then assume that EMAX + abs(EMIN) will sum */ /* approximately to the bound that is closest to abs(EMIN). */ /* (EMAX is the exponent of the required number RMAX). */ /*< LEXP = 1 >*/ lexp = 1; /*< EXBITS = 1 >*/ exbits = 1; /*< 10 CONTINUE >*/ L10: /*< TRY = LEXP*2 >*/ try__ = lexp << 1; /*< IF( TRY.LE.( -EMIN ) ) THEN >*/ if (try__ <= -(*emin)) { /*< LEXP = TRY >*/ lexp = try__; /*< EXBITS = EXBITS + 1 >*/ ++exbits; /*< GO TO 10 >*/ goto L10; /*< END IF >*/ } /*< IF( LEXP.EQ.-EMIN ) THEN >*/ if (lexp == -(*emin)) { /*< UEXP = LEXP >*/ uexp = lexp; /*< ELSE >*/ } else { /*< UEXP = TRY >*/ uexp = try__; /*< EXBITS = EXBITS + 1 >*/ ++exbits; /*< END IF >*/ } /* Now -LEXP is less than or equal to EMIN, and -UEXP is greater */ /* than or equal to EMIN. EXBITS is the number of bits needed to */ /* store the exponent. */ /*< IF( ( UEXP+EMIN ).GT.( -LEXP-EMIN ) ) THEN >*/ if (uexp + *emin > -lexp - *emin) { /*< EXPSUM = 2*LEXP >*/ expsum = lexp << 1; /*< ELSE >*/ } else { /*< EXPSUM = 2*UEXP >*/ expsum = uexp << 1; /*< END IF >*/ } /* EXPSUM is the exponent range, approximately equal to */ /* EMAX - EMIN + 1 . */ /*< EMAX = EXPSUM + EMIN - 1 >*/ *emax = expsum + *emin - 1; /*< NBITS = 1 + EXBITS + P >*/ nbits = exbits + 1 + *p; /* NBITS is the total number of bits needed to store a */ /* floating-point number. */ /*< IF( ( MOD( NBITS, 2 ).EQ.1 ) .AND. ( BETA.EQ.2 ) ) THEN >*/ if (nbits % 2 == 1 && *beta == 2) { /* Either there are an odd number of bits used to store a */ /* floating-point number, which is unlikely, or some bits are */ /* not used in the representation of numbers, which is possible, */ /* (e.g. Cray machines) or the mantissa has an implicit bit, */ /* (e.g. IEEE machines, Dec Vax machines), which is perhaps the */ /* most likely. We have to assume the last alternative. */ /* If this is true, then we need to reduce EMAX by one because */ /* there must be some way of representing zero in an implicit-bit */ /* system. On machines like Cray, we are reducing EMAX by one */ /* unnecessarily. */ /*< EMAX = EMAX - 1 >*/ --(*emax); /*< END IF >*/ } /*< IF( IEEE ) THEN >*/ if (*ieee) { /* Assume we are on an IEEE machine which reserves one exponent */ /* for infinity and NaN. */ /*< EMAX = EMAX - 1 >*/ --(*emax); /*< END IF >*/ } /* Now create RMAX, the largest machine number, which should */ /* be equal to (1.0 - BETA**(-P)) * BETA**EMAX . */ /* First compute 1.0 - BETA**(-P), being careful that the */ /* result is less than 1.0 . */ /*< RECBAS = ONE / BETA >*/ recbas = (float)1. / *beta; /*< Z = BETA - ONE >*/ z__ = *beta - (float)1.; /*< Y = ZERO >*/ y = (float)0.; /*< DO 20 I = 1, P >*/ i__1 = *p; for (i__ = 1; i__ <= i__1; ++i__) { /*< Z = Z*RECBAS >*/ z__ *= recbas; /*< >*/ if (y < (float)1.) { oldy = y; } /*< Y = SLAMC3( Y, Z ) >*/ y = slamc3_(&y, &z__); /*< 20 CONTINUE >*/ /* L20: */ } /*< >*/ if (y >= (float)1.) { y = oldy; } /* Now multiply by BETA**EMAX to get RMAX. */ /*< DO 30 I = 1, EMAX >*/ i__1 = *emax; for (i__ = 1; i__ <= i__1; ++i__) { /*< Y = SLAMC3( Y*BETA, ZERO ) >*/ r__1 = y * *beta; y = slamc3_(&r__1, &c_b32); /*< 30 CONTINUE >*/ /* L30: */ } /*< RMAX = Y >*/ *rmax = y; /*< RETURN >*/ return 0; /* End of SLAMC5 */ /*< END >*/ } /* slamc5_ */
/* Subroutine */ int slaed3_(integer *k, integer *n, integer *n1, real *d__, real *q, integer *ldq, real *rho, real *dlamda, real *q2, integer * indx, integer *ctot, real *w, real *s, integer *info) { /* System generated locals */ integer q_dim1, q_offset, i__1, i__2; real r__1; /* Builtin functions */ double sqrt(doublereal), r_sign(real *, real *); /* Local variables */ static real temp; extern doublereal snrm2_(integer *, real *, integer *); static integer i__, j; extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *); static integer n2; extern /* Subroutine */ int slaed4_(integer *, integer *, real *, real *, real *, real *, real *, integer *); extern doublereal slamc3_(real *, real *); static integer n12, ii, n23; extern /* Subroutine */ int xerbla_(char *, integer *), slacpy_( char *, integer *, integer *, real *, integer *, real *, integer * ), slaset_(char *, integer *, integer *, real *, real *, real *, integer *); static integer iq2; #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] /* -- LAPACK routine (instrumented to count operations, version 3.0) -- Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, Courant Institute, NAG Ltd., and Rice University June 30, 1999 Common block to return operation count and iteration count ITCNT is unchanged, OPS is only incremented Purpose ======= SLAED3 finds the roots of the secular equation, as defined by the values in D, W, and RHO, between 1 and K. It makes the appropriate calls to SLAED4 and then updates the eigenvectors by multiplying the matrix of eigenvectors of the pair of eigensystems being combined by the matrix of eigenvectors of the K-by-K system which is solved here. This code makes very mild assumptions about floating point arithmetic. It will work on machines with a guard digit in add/subtract, or on those binary machines without guard digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. It could conceivably fail on hexadecimal or decimal machines without guard digits, but we know of none. Arguments ========= K (input) INTEGER The number of terms in the rational function to be solved by SLAED4. K >= 0. N (input) INTEGER The number of rows and columns in the Q matrix. N >= K (deflation may result in N>K). N1 (input) INTEGER The location of the last eigenvalue in the leading submatrix. min(1,N) <= N1 <= N/2. D (output) REAL array, dimension (N) D(I) contains the updated eigenvalues for 1 <= I <= K. Q (output) REAL array, dimension (LDQ,N) Initially the first K columns are used as workspace. On output the columns 1 to K contain the updated eigenvectors. LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= max(1,N). RHO (input) REAL The value of the parameter in the rank one update equation. RHO >= 0 required. DLAMDA (input/output) REAL array, dimension (K) The first K elements of this array contain the old roots of the deflated updating problem. These are the poles of the secular equation. May be changed on output by having lowest order bit set to zero on Cray X-MP, Cray Y-MP, Cray-2, or Cray C-90, as described above. Q2 (input) REAL array, dimension (LDQ2, N) The first K columns of this matrix contain the non-deflated eigenvectors for the split problem. INDX (input) INTEGER array, dimension (N) The permutation used to arrange the columns of the deflated Q matrix into three groups (see SLAED2). The rows of the eigenvectors found by SLAED4 must be likewise permuted before the matrix multiply can take place. CTOT (input) INTEGER array, dimension (4) A count of the total number of the various types of columns in Q, as described in INDX. The fourth column type is any column which has been deflated. W (input/output) REAL array, dimension (K) The first K elements of this array contain the components of the deflation-adjusted updating vector. Destroyed on output. S (workspace) REAL array, dimension (N1 + 1)*K Will contain the eigenvectors of the repaired matrix which will be multiplied by the previously accumulated eigenvectors to update the system. LDS (input) INTEGER The leading dimension of S. LDS >= max(1,K). INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if INFO = 1, an eigenvalue did not converge Further Details =============== Based on contributions by Jeff Rutter, Computer Science Division, University of California at Berkeley, USA Modified by Francoise Tisseur, University of Tennessee. ===================================================================== Test the input parameters. Parameter adjustments */ --d__; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; --dlamda; --q2; --indx; --ctot; --w; --s; /* Function Body */ *info = 0; if (*k < 0) { *info = -1; } else if (*n < *k) { *info = -2; } else if (*ldq < max(1,*n)) { *info = -6; } if (*info != 0) { i__1 = -(*info); xerbla_("SLAED3", &i__1); return 0; } /* Quick return if possible */ if (*k == 0) { return 0; } /* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can be computed with high relative accuracy (barring over/underflow). This is a problem on machines without a guard digit in add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), which on any of these machines zeros out the bottommost bit of DLAMDA(I) if it is 1; this makes the subsequent subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation occurs. On binary machines with a guard digit (almost all machines) it does not change DLAMDA(I) at all. On hexadecimal and decimal machines with a guard digit, it slightly changes the bottommost bits of DLAMDA(I). It does not account for hexadecimal or decimal machines without guard digits (we know of none). We use a subroutine call to compute 2*DLAMBDA(I) to prevent optimizing compilers from eliminating this code. */ latime_1.ops += *n << 1; i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { dlamda[i__] = slamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; /* L10: */ } i__1 = *k; for (j = 1; j <= i__1; ++j) { slaed4_(k, &j, &dlamda[1], &w[1], &q_ref(1, j), rho, &d__[j], info); /* If the zero finder fails, the computation is terminated. */ if (*info != 0) { goto L120; } /* L20: */ } if (*k == 1) { goto L110; } if (*k == 2) { i__1 = *k; for (j = 1; j <= i__1; ++j) { w[1] = q_ref(1, j); w[2] = q_ref(2, j); ii = indx[1]; q_ref(1, j) = w[ii]; ii = indx[2]; q_ref(2, j) = w[ii]; /* L30: */ } goto L110; } /* Compute updated W. */ scopy_(k, &w[1], &c__1, &s[1], &c__1); /* Initialize W(I) = Q(I,I) */ i__1 = *ldq + 1; scopy_(k, &q[q_offset], &i__1, &w[1], &c__1); latime_1.ops += *k * 3 * (*k - 1); i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { w[i__] *= q_ref(i__, j) / (dlamda[i__] - dlamda[j]); /* L40: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { w[i__] *= q_ref(i__, j) / (dlamda[i__] - dlamda[j]); /* L50: */ } /* L60: */ } latime_1.ops += *k; i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { r__1 = sqrt(-w[i__]); w[i__] = r_sign(&r__1, &s[i__]); /* L70: */ } /* Compute eigenvectors of the modified rank-1 modification. */ latime_1.ops += (*k << 2) * *k; i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = *k; for (i__ = 1; i__ <= i__2; ++i__) { s[i__] = w[i__] / q_ref(i__, j); /* L80: */ } temp = snrm2_(k, &s[1], &c__1); i__2 = *k; for (i__ = 1; i__ <= i__2; ++i__) { ii = indx[i__]; q_ref(i__, j) = s[ii] / temp; /* L90: */ } /* L100: */ } /* Compute the updated eigenvectors. */ L110: n2 = *n - *n1; n12 = ctot[1] + ctot[2]; n23 = ctot[2] + ctot[3]; slacpy_("A", &n23, k, &q_ref(ctot[1] + 1, 1), ldq, &s[1], &n23) ; iq2 = *n1 * n12 + 1; if (n23 != 0) { latime_1.ops += (real) n2 * 2 * *k * n23; sgemm_("N", "N", &n2, k, &n23, &c_b22, &q2[iq2], &n2, &s[1], &n23, & c_b23, &q_ref(*n1 + 1, 1), ldq); } else { slaset_("A", &n2, k, &c_b23, &c_b23, &q_ref(*n1 + 1, 1), ldq); } slacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12); if (n12 != 0) { latime_1.ops += (real) (*n1) * 2 * *k * n12; sgemm_("N", "N", n1, k, &n12, &c_b22, &q2[1], n1, &s[1], &n12, &c_b23, &q[q_offset], ldq); } else { slaset_("A", n1, k, &c_b23, &c_b23, &q_ref(1, 1), ldq); } L120: return 0; /* End of SLAED3 */ } /* slaed3_ */
/* Subroutine */ int slals0_(integer *icompq, integer *nl, integer *nr, integer *sqre, integer *nrhs, real *b, integer *ldb, real *bx, integer *ldbx, integer *perm, integer *givptr, integer *givcol, integer *ldgcol, real *givnum, integer *ldgnum, real *poles, real * difl, real *difr, real *z__, integer *k, real *c__, real *s, real * work, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University December 1, 1999 Purpose ======= SLALS0 applies back the multiplying factors of either the left or the right singular vector matrix of a diagonal matrix appended by a row to the right hand side matrix B in solving the least squares problem using the divide-and-conquer SVD approach. For the left singular vector matrix, three types of orthogonal matrices are involved: (1L) Givens rotations: the number of such rotations is GIVPTR; the pairs of columns/rows they were applied to are stored in GIVCOL; and the C- and S-values of these rotations are stored in GIVNUM. (2L) Permutation. The (NL+1)-st row of B is to be moved to the first row, and for J=2:N, PERM(J)-th row of B is to be moved to the J-th row. (3L) The left singular vector matrix of the remaining matrix. For the right singular vector matrix, four types of orthogonal matrices are involved: (1R) The right singular vector matrix of the remaining matrix. (2R) If SQRE = 1, one extra Givens rotation to generate the right null space. (3R) The inverse transformation of (2L). (4R) The inverse transformation of (1L). Arguments ========= ICOMPQ (input) INTEGER Specifies whether singular vectors are to be computed in factored form: = 0: Left singular vector matrix. = 1: Right singular vector matrix. NL (input) INTEGER The row dimension of the upper block. NL >= 1. NR (input) INTEGER The row dimension of the lower block. NR >= 1. SQRE (input) INTEGER = 0: the lower block is an NR-by-NR square matrix. = 1: the lower block is an NR-by-(NR+1) rectangular matrix. The bidiagonal matrix has row dimension N = NL + NR + 1, and column dimension M = N + SQRE. NRHS (input) INTEGER The number of columns of B and BX. NRHS must be at least 1. B (input/output) REAL array, dimension ( LDB, NRHS ) On input, B contains the right hand sides of the least squares problem in rows 1 through M. On output, B contains the solution X in rows 1 through N. LDB (input) INTEGER The leading dimension of B. LDB must be at least max(1,MAX( M, N ) ). BX (workspace) REAL array, dimension ( LDBX, NRHS ) LDBX (input) INTEGER The leading dimension of BX. PERM (input) INTEGER array, dimension ( N ) The permutations (from deflation and sorting) applied to the two blocks. GIVPTR (input) INTEGER The number of Givens rotations which took place in this subproblem. GIVCOL (input) INTEGER array, dimension ( LDGCOL, 2 ) Each pair of numbers indicates a pair of rows/columns involved in a Givens rotation. LDGCOL (input) INTEGER The leading dimension of GIVCOL, must be at least N. GIVNUM (input) REAL array, dimension ( LDGNUM, 2 ) Each number indicates the C or S value used in the corresponding Givens rotation. LDGNUM (input) INTEGER The leading dimension of arrays DIFR, POLES and GIVNUM, must be at least K. POLES (input) REAL array, dimension ( LDGNUM, 2 ) On entry, POLES(1:K, 1) contains the new singular values obtained from solving the secular equation, and POLES(1:K, 2) is an array containing the poles in the secular equation. DIFL (input) REAL array, dimension ( K ). On entry, DIFL(I) is the distance between I-th updated (undeflated) singular value and the I-th (undeflated) old singular value. DIFR (input) REAL array, dimension ( LDGNUM, 2 ). On entry, DIFR(I, 1) contains the distances between I-th updated (undeflated) singular value and the I+1-th (undeflated) old singular value. And DIFR(I, 2) is the normalizing factor for the I-th right singular vector. Z (input) REAL array, dimension ( K ) Contain the components of the deflation-adjusted updating row vector. K (input) INTEGER Contains the dimension of the non-deflated matrix, This is the order of the related secular equation. 1 <= K <=N. C (input) REAL C contains garbage if SQRE =0 and the C-value of a Givens rotation related to the right null space if SQRE = 1. S (input) REAL S contains garbage if SQRE =0 and the S-value of a Givens rotation related to the right null space if SQRE = 1. WORK (workspace) REAL array, dimension ( K ) INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. Further Details =============== Based on contributions by Ming Gu and Ren-Cang Li, Computer Science Division, University of California at Berkeley, USA Osni Marques, LBNL/NERSC, USA ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static real c_b5 = -1.f; static integer c__1 = 1; static real c_b11 = 1.f; static real c_b13 = 0.f; static integer c__0 = 0; /* System generated locals */ integer givcol_dim1, givcol_offset, b_dim1, b_offset, bx_dim1, bx_offset, difr_dim1, difr_offset, givnum_dim1, givnum_offset, poles_dim1, poles_offset, i__1, i__2; real r__1; /* Local variables */ static real temp; extern /* Subroutine */ int srot_(integer *, real *, integer *, real *, integer *, real *, real *); extern doublereal snrm2_(integer *, real *, integer *); static integer i__, j, m, n; static real diflj, difrj, dsigj; extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), sgemv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *), scopy_( integer *, real *, integer *, real *, integer *); extern doublereal slamc3_(real *, real *); static real dj; extern /* Subroutine */ int xerbla_(char *, integer *); static real dsigjp; extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *); static integer nlp1; #define difr_ref(a_1,a_2) difr[(a_2)*difr_dim1 + a_1] #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1] #define poles_ref(a_1,a_2) poles[(a_2)*poles_dim1 + a_1] #define bx_ref(a_1,a_2) bx[(a_2)*bx_dim1 + a_1] #define givcol_ref(a_1,a_2) givcol[(a_2)*givcol_dim1 + a_1] #define givnum_ref(a_1,a_2) givnum[(a_2)*givnum_dim1 + a_1] b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; bx_dim1 = *ldbx; bx_offset = 1 + bx_dim1 * 1; bx -= bx_offset; --perm; givcol_dim1 = *ldgcol; givcol_offset = 1 + givcol_dim1 * 1; givcol -= givcol_offset; difr_dim1 = *ldgnum; difr_offset = 1 + difr_dim1 * 1; difr -= difr_offset; poles_dim1 = *ldgnum; poles_offset = 1 + poles_dim1 * 1; poles -= poles_offset; givnum_dim1 = *ldgnum; givnum_offset = 1 + givnum_dim1 * 1; givnum -= givnum_offset; --difl; --z__; --work; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*nl < 1) { *info = -2; } else if (*nr < 1) { *info = -3; } else if (*sqre < 0 || *sqre > 1) { *info = -4; } n = *nl + *nr + 1; if (*nrhs < 1) { *info = -5; } else if (*ldb < n) { *info = -7; } else if (*ldbx < n) { *info = -9; } else if (*givptr < 0) { *info = -11; } else if (*ldgcol < n) { *info = -13; } else if (*ldgnum < n) { *info = -15; } else if (*k < 1) { *info = -20; } if (*info != 0) { i__1 = -(*info); xerbla_("SLALS0", &i__1); return 0; } m = n + *sqre; nlp1 = *nl + 1; if (*icompq == 0) { /* Apply back orthogonal transformations from the left. Step (1L): apply back the Givens rotations performed. */ i__1 = *givptr; for (i__ = 1; i__ <= i__1; ++i__) { srot_(nrhs, &b_ref(givcol_ref(i__, 2), 1), ldb, &b_ref(givcol_ref( i__, 1), 1), ldb, &givnum_ref(i__, 2), &givnum_ref(i__, 1) ); /* L10: */ } /* Step (2L): permute rows of B. */ scopy_(nrhs, &b_ref(nlp1, 1), ldb, &bx_ref(1, 1), ldbx); i__1 = n; for (i__ = 2; i__ <= i__1; ++i__) { scopy_(nrhs, &b_ref(perm[i__], 1), ldb, &bx_ref(i__, 1), ldbx); /* L20: */ } /* Step (3L): apply the inverse of the left singular vector matrix to BX. */ if (*k == 1) { scopy_(nrhs, &bx[bx_offset], ldbx, &b[b_offset], ldb); if (z__[1] < 0.f) { sscal_(nrhs, &c_b5, &b[b_offset], ldb); } } else { i__1 = *k; for (j = 1; j <= i__1; ++j) { diflj = difl[j]; dj = poles_ref(j, 1); dsigj = -poles_ref(j, 2); if (j < *k) { difrj = -difr_ref(j, 1); dsigjp = -poles_ref(j + 1, 2); } if (z__[j] == 0.f || poles_ref(j, 2) == 0.f) { work[j] = 0.f; } else { work[j] = -poles_ref(j, 2) * z__[j] / diflj / (poles_ref( j, 2) + dj); } i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { if (z__[i__] == 0.f || poles_ref(i__, 2) == 0.f) { work[i__] = 0.f; } else { work[i__] = poles_ref(i__, 2) * z__[i__] / (slamc3_(& poles_ref(i__, 2), &dsigj) - diflj) / ( poles_ref(i__, 2) + dj); } /* L30: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { if (z__[i__] == 0.f || poles_ref(i__, 2) == 0.f) { work[i__] = 0.f; } else { work[i__] = poles_ref(i__, 2) * z__[i__] / (slamc3_(& poles_ref(i__, 2), &dsigjp) + difrj) / ( poles_ref(i__, 2) + dj); } /* L40: */ } work[1] = -1.f; temp = snrm2_(k, &work[1], &c__1); sgemv_("T", k, nrhs, &c_b11, &bx[bx_offset], ldbx, &work[1], & c__1, &c_b13, &b_ref(j, 1), ldb); slascl_("G", &c__0, &c__0, &temp, &c_b11, &c__1, nrhs, &b_ref( j, 1), ldb, info); /* L50: */ } } /* Move the deflated rows of BX to B also. */ if (*k < max(m,n)) { i__1 = n - *k; slacpy_("A", &i__1, nrhs, &bx_ref(*k + 1, 1), ldbx, &b_ref(*k + 1, 1), ldb); } } else { /* Apply back the right orthogonal transformations. Step (1R): apply back the new right singular vector matrix to B. */ if (*k == 1) { scopy_(nrhs, &b[b_offset], ldb, &bx[bx_offset], ldbx); } else { i__1 = *k; for (j = 1; j <= i__1; ++j) { dsigj = poles_ref(j, 2); if (z__[j] == 0.f) { work[j] = 0.f; } else { work[j] = -z__[j] / difl[j] / (dsigj + poles_ref(j, 1)) / difr_ref(j, 2); } i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { if (z__[j] == 0.f) { work[i__] = 0.f; } else { r__1 = -poles_ref(i__ + 1, 2); work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difr_ref(i__, 1)) / (dsigj + poles_ref(i__, 1) ) / difr_ref(i__, 2); } /* L60: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { if (z__[j] == 0.f) { work[i__] = 0.f; } else { r__1 = -poles_ref(i__, 2); work[i__] = z__[j] / (slamc3_(&dsigj, &r__1) - difl[ i__]) / (dsigj + poles_ref(i__, 1)) / difr_ref(i__, 2); } /* L70: */ } sgemv_("T", k, nrhs, &c_b11, &b[b_offset], ldb, &work[1], & c__1, &c_b13, &bx_ref(j, 1), ldbx); /* L80: */ } } /* Step (2R): if SQRE = 1, apply back the rotation that is related to the right null space of the subproblem. */ if (*sqre == 1) { scopy_(nrhs, &b_ref(m, 1), ldb, &bx_ref(m, 1), ldbx); srot_(nrhs, &bx_ref(1, 1), ldbx, &bx_ref(m, 1), ldbx, c__, s); } if (*k < max(m,n)) { i__1 = n - *k; slacpy_("A", &i__1, nrhs, &b_ref(*k + 1, 1), ldb, &bx_ref(*k + 1, 1), ldbx); } /* Step (3R): permute rows of B. */ scopy_(nrhs, &bx_ref(1, 1), ldbx, &b_ref(nlp1, 1), ldb); if (*sqre == 1) { scopy_(nrhs, &bx_ref(m, 1), ldbx, &b_ref(m, 1), ldb); } i__1 = n; for (i__ = 2; i__ <= i__1; ++i__) { scopy_(nrhs, &bx_ref(i__, 1), ldbx, &b_ref(perm[i__], 1), ldb); /* L90: */ } /* Step (4R): apply back the Givens rotations performed. */ for (i__ = *givptr; i__ >= 1; --i__) { r__1 = -givnum_ref(i__, 1); srot_(nrhs, &b_ref(givcol_ref(i__, 2), 1), ldb, &b_ref(givcol_ref( i__, 1), 1), ldb, &givnum_ref(i__, 2), &r__1); /* L100: */ } } return 0; /* End of SLALS0 */ } /* slals0_ */
/* Subroutine */ int slasd8_(integer *icompq, integer *k, real *d__, real * z__, real *vf, real *vl, real *difl, real *difr, integer *lddifr, real *dsigma, real *work, integer *info) { /* System generated locals */ integer difr_dim1, difr_offset, i__1, i__2; real r__1, r__2; /* Builtin functions */ double sqrt(doublereal), r_sign(real *, real *); /* Local variables */ integer i__, j; real dj, rho; integer iwk1, iwk2, iwk3; real temp; extern doublereal sdot_(integer *, real *, integer *, real *, integer *); integer iwk2i, iwk3i; extern doublereal snrm2_(integer *, real *, integer *); real diflj, difrj, dsigj; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *); extern doublereal slamc3_(real *, real *); extern /* Subroutine */ int slasd4_(integer *, integer *, real *, real *, real *, real *, real *, real *, integer *), xerbla_(char *, integer *); real dsigjp; extern /* Subroutine */ int slascl_(char *, integer *, integer *, real *, real *, integer *, integer *, real *, integer *, integer *), slaset_(char *, integer *, integer *, real *, real *, real *, integer *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* October 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SLASD8 finds the square roots of the roots of the secular equation, */ /* as defined by the values in DSIGMA and Z. It makes the appropriate */ /* calls to SLASD4, and stores, for each element in D, the distance */ /* to its two nearest poles (elements in DSIGMA). It also updates */ /* the arrays VF and VL, the first and last components of all the */ /* right singular vectors of the original bidiagonal matrix. */ /* SLASD8 is called from SLASD6. */ /* Arguments */ /* ========= */ /* ICOMPQ (input) INTEGER */ /* Specifies whether singular vectors are to be computed in */ /* factored form in the calling routine: */ /* = 0: Compute singular values only. */ /* = 1: Compute singular vectors in factored form as well. */ /* K (input) INTEGER */ /* The number of terms in the rational function to be solved */ /* by SLASD4. K >= 1. */ /* D (output) REAL array, dimension ( K ) */ /* On output, D contains the updated singular values. */ /* Z (input/output) REAL array, dimension ( K ) */ /* On entry, the first K elements of this array contain the */ /* components of the deflation-adjusted updating row vector. */ /* On exit, Z is updated. */ /* VF (input/output) REAL array, dimension ( K ) */ /* On entry, VF contains information passed through DBEDE8. */ /* On exit, VF contains the first K components of the first */ /* components of all right singular vectors of the bidiagonal */ /* matrix. */ /* VL (input/output) REAL array, dimension ( K ) */ /* On entry, VL contains information passed through DBEDE8. */ /* On exit, VL contains the first K components of the last */ /* components of all right singular vectors of the bidiagonal */ /* matrix. */ /* DIFL (output) REAL array, dimension ( K ) */ /* On exit, DIFL(I) = D(I) - DSIGMA(I). */ /* DIFR (output) REAL array, */ /* dimension ( LDDIFR, 2 ) if ICOMPQ = 1 and */ /* dimension ( K ) if ICOMPQ = 0. */ /* On exit, DIFR(I,1) = D(I) - DSIGMA(I+1), DIFR(K,1) is not */ /* defined and will not be referenced. */ /* If ICOMPQ = 1, DIFR(1:K,2) is an array containing the */ /* normalizing factors for the right singular vector matrix. */ /* LDDIFR (input) INTEGER */ /* The leading dimension of DIFR, must be at least K. */ /* DSIGMA (input/output) REAL array, dimension ( K ) */ /* On entry, the first K elements of this array contain the old */ /* roots of the deflated updating problem. These are the poles */ /* of the secular equation. */ /* On exit, the elements of DSIGMA may be very slightly altered */ /* in value. */ /* WORK (workspace) REAL array, dimension at least 3 * K */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = 1, an singular value did not converge */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ming Gu and Huan Ren, Computer Science Division, University of */ /* California at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; --z__; --vf; --vl; --difl; difr_dim1 = *lddifr; difr_offset = 1 + difr_dim1; difr -= difr_offset; --dsigma; --work; /* Function Body */ *info = 0; if (*icompq < 0 || *icompq > 1) { *info = -1; } else if (*k < 1) { *info = -2; } else if (*lddifr < *k) { *info = -9; } if (*info != 0) { i__1 = -(*info); xerbla_("SLASD8", &i__1); return 0; } /* Quick return if possible */ if (*k == 1) { d__[1] = dabs(z__[1]); difl[1] = d__[1]; if (*icompq == 1) { difl[2] = 1.f; difr[(difr_dim1 << 1) + 1] = 1.f; } return 0; } /* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can */ /* be computed with high relative accuracy (barring over/underflow). */ /* This is a problem on machines without a guard digit in */ /* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */ /* The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), */ /* which on any of these machines zeros out the bottommost */ /* bit of DSIGMA(I) if it is 1; this makes the subsequent */ /* subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation */ /* occurs. On binary machines with a guard digit (almost all */ /* machines) it does not change DSIGMA(I) at all. On hexadecimal */ /* and decimal machines with a guard digit, it slightly */ /* changes the bottommost bits of DSIGMA(I). It does not account */ /* for hexadecimal or decimal machines without guard digits */ /* (we know of none). We use a subroutine call to compute */ /* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating */ /* this code. */ i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { dsigma[i__] = slamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; /* L10: */ } /* Book keeping. */ iwk1 = 1; iwk2 = iwk1 + *k; iwk3 = iwk2 + *k; iwk2i = iwk2 - 1; iwk3i = iwk3 - 1; /* Normalize Z. */ rho = snrm2_(k, &z__[1], &c__1); slascl_("G", &c__0, &c__0, &rho, &c_b8, k, &c__1, &z__[1], k, info); rho *= rho; /* Initialize WORK(IWK3). */ slaset_("A", k, &c__1, &c_b8, &c_b8, &work[iwk3], k); /* Compute the updated singular values, the arrays DIFL, DIFR, */ /* and the updated Z. */ i__1 = *k; for (j = 1; j <= i__1; ++j) { slasd4_(k, &j, &dsigma[1], &z__[1], &work[iwk1], &rho, &d__[j], &work[ iwk2], info); /* If the root finder fails, the computation is terminated. */ if (*info != 0) { return 0; } work[iwk3i + j] = work[iwk3i + j] * work[j] * work[iwk2i + j]; difl[j] = -work[j]; difr[j + difr_dim1] = -work[j + 1]; i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ j]); /* L20: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { work[iwk3i + i__] = work[iwk3i + i__] * work[i__] * work[iwk2i + i__] / (dsigma[i__] - dsigma[j]) / (dsigma[i__] + dsigma[ j]); /* L30: */ } /* L40: */ } /* Compute updated Z. */ i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { r__2 = sqrt((r__1 = work[iwk3i + i__], dabs(r__1))); z__[i__] = r_sign(&r__2, &z__[i__]); /* L50: */ } /* Update VF and VL. */ i__1 = *k; for (j = 1; j <= i__1; ++j) { diflj = difl[j]; dj = d__[j]; dsigj = -dsigma[j]; if (j < *k) { difrj = -difr[j + difr_dim1]; dsigjp = -dsigma[j + 1]; } work[j] = -z__[j] / diflj / (dsigma[j] + dj); i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { work[i__] = z__[i__] / (slamc3_(&dsigma[i__], &dsigj) - diflj) / ( dsigma[i__] + dj); /* L60: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { work[i__] = z__[i__] / (slamc3_(&dsigma[i__], &dsigjp) + difrj) / (dsigma[i__] + dj); /* L70: */ } temp = snrm2_(k, &work[1], &c__1); work[iwk2i + j] = sdot_(k, &work[1], &c__1, &vf[1], &c__1) / temp; work[iwk3i + j] = sdot_(k, &work[1], &c__1, &vl[1], &c__1) / temp; if (*icompq == 1) { difr[j + (difr_dim1 << 1)] = temp; } /* L80: */ } scopy_(k, &work[iwk2], &c__1, &vf[1], &c__1); scopy_(k, &work[iwk3], &c__1, &vl[1], &c__1); return 0; /* End of SLASD8 */ } /* slasd8_ */
/* Subroutine */ int slamc5_(integer *beta, integer *p, integer *emin, logical *ieee, integer *emax, real *rmax) { /* System generated locals */ integer i__1; real r__1; /* Local variables */ integer i__; real y, z__; integer try__, lexp; real oldy; integer uexp, nbits; extern doublereal slamc3_(real *, real *); real recbas; integer exbits, expsum; /* -- LAPACK auxiliary routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SLAMC5 attempts to compute RMAX, the largest machine floating-point */ /* number, without overflow. It assumes that EMAX + abs(EMIN) sum */ /* approximately to a power of 2. It will fail on machines where this */ /* assumption does not hold, for example, the Cyber 205 (EMIN = -28625, */ /* EMAX = 28718). It will also fail if the value supplied for EMIN is */ /* too large (i.e. too close to zero), probably with overflow. */ /* Arguments */ /* ========= */ /* BETA (input) INTEGER */ /* The base of floating-point arithmetic. */ /* P (input) INTEGER */ /* The number of base BETA digits in the mantissa of a */ /* floating-point value. */ /* EMIN (input) INTEGER */ /* The minimum exponent before (gradual) underflow. */ /* IEEE (input) LOGICAL */ /* A logical flag specifying whether or not the arithmetic */ /* system is thought to comply with the IEEE standard. */ /* EMAX (output) INTEGER */ /* The largest exponent before overflow */ /* RMAX (output) REAL */ /* The largest machine floating-point number. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* First compute LEXP and UEXP, two powers of 2 that bound */ /* abs(EMIN). We then assume that EMAX + abs(EMIN) will sum */ /* approximately to the bound that is closest to abs(EMIN). */ /* (EMAX is the exponent of the required number RMAX). */ lexp = 1; exbits = 1; L10: try__ = lexp << 1; if (try__ <= -(*emin)) { lexp = try__; ++exbits; goto L10; } if (lexp == -(*emin)) { uexp = lexp; } else { uexp = try__; ++exbits; } /* Now -LEXP is less than or equal to EMIN, and -UEXP is greater */ /* than or equal to EMIN. EXBITS is the number of bits needed to */ /* store the exponent. */ if (uexp + *emin > -lexp - *emin) { expsum = lexp << 1; } else { expsum = uexp << 1; } /* EXPSUM is the exponent range, approximately equal to */ /* EMAX - EMIN + 1 . */ *emax = expsum + *emin - 1; nbits = exbits + 1 + *p; /* NBITS is the total number of bits needed to store a */ /* floating-point number. */ if (nbits % 2 == 1 && *beta == 2) { /* Either there are an odd number of bits used to store a */ /* floating-point number, which is unlikely, or some bits are */ /* not used in the representation of numbers, which is possible, */ /* (e.g. Cray machines) or the mantissa has an implicit bit, */ /* (e.g. IEEE machines, Dec Vax machines), which is perhaps the */ /* most likely. We have to assume the last alternative. */ /* If this is true, then we need to reduce EMAX by one because */ /* there must be some way of representing zero in an implicit-bit */ /* system. On machines like Cray, we are reducing EMAX by one */ /* unnecessarily. */ --(*emax); } if (*ieee) { /* Assume we are on an IEEE machine which reserves one exponent */ /* for infinity and NaN. */ --(*emax); } /* Now create RMAX, the largest machine number, which should */ /* be equal to (1.0 - BETA**(-P)) * BETA**EMAX . */ /* First compute 1.0 - BETA**(-P), being careful that the */ /* result is less than 1.0 . */ recbas = 1.f / *beta; z__ = *beta - 1.f; y = 0.f; i__1 = *p; for (i__ = 1; i__ <= i__1; ++i__) { z__ *= recbas; if (y < 1.f) { oldy = y; } y = slamc3_(&y, &z__); /* L20: */ } if (y >= 1.f) { y = oldy; } /* Now multiply by BETA**EMAX to get RMAX. */ i__1 = *emax; for (i__ = 1; i__ <= i__1; ++i__) { r__1 = y * *beta; y = slamc3_(&r__1, &c_b32); /* L30: */ } *rmax = y; return 0; /* End of SLAMC5 */ } /* slamc5_ */
/* Subroutine */ int slamc4_(integer *emin, real *start, integer *base) { /* System generated locals */ integer i__1; real r__1; /* Local variables */ real a; integer i__; real b1, b2, c1, c2, d1, d2, one, zero, rbase; extern doublereal slamc3_(real *, real *); /* -- LAPACK auxiliary routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SLAMC4 is a service routine for SLAMC2. */ /* Arguments */ /* ========= */ /* EMIN (output) INTEGER */ /* The minimum exponent before (gradual) underflow, computed by */ /* setting A = START and dividing by BASE until the previous A */ /* can not be recovered. */ /* START (input) REAL */ /* The starting point for determining EMIN. */ /* BASE (input) INTEGER */ /* The base of the machine. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. Executable Statements .. */ a = *start; one = 1.f; rbase = one / *base; zero = 0.f; *emin = 1; r__1 = a * rbase; b1 = slamc3_(&r__1, &zero); c1 = a; c2 = a; d1 = a; d2 = a; /* + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. */ /* $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP */ L10: if (c1 == a && c2 == a && d1 == a && d2 == a) { --(*emin); a = b1; r__1 = a / *base; b1 = slamc3_(&r__1, &zero); r__1 = b1 * *base; c1 = slamc3_(&r__1, &zero); d1 = zero; i__1 = *base; for (i__ = 1; i__ <= i__1; ++i__) { d1 += b1; /* L20: */ } r__1 = a * rbase; b2 = slamc3_(&r__1, &zero); r__1 = b2 / rbase; c2 = slamc3_(&r__1, &zero); d2 = zero; i__1 = *base; for (i__ = 1; i__ <= i__1; ++i__) { d2 += b2; /* L30: */ } goto L10; } /* + END WHILE */ return 0; /* End of SLAMC4 */ } /* slamc4_ */
/* Subroutine */ int slamc2_(integer *beta, integer *t, logical *rnd, real * eps, integer *emin, real *rmin, integer *emax, real *rmax) { /* Initialized data */ static logical first = TRUE_; static logical iwarn = FALSE_; /* Format strings */ static char fmt_9999[] = "(//\002 WARNING. The value EMIN may be incorre" "ct:-\002,\002 EMIN = \002,i8,/\002 If, after inspection, the va" "lue EMIN looks\002,\002 acceptable please comment out \002,/\002" " the IF block as marked within the code of routine\002,\002 SLAM" "C2,\002,/\002 otherwise supply EMIN explicitly.\002,/)"; /* System generated locals */ integer i__1; real r__1, r__2, r__3, r__4, r__5; /* Builtin functions */ double pow_ri(real *, integer *); //integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); /* Local variables */ real a, b, c__; integer i__; static integer lt; real one, two; logical ieee; real half; logical lrnd; static real leps; real zero; static integer lbeta; real rbase; static integer lemin, lemax; integer gnmin; real small; integer gpmin; real third; static real lrmin, lrmax; real sixth; logical lieee1; extern /* Subroutine */ int slamc1_(integer *, integer *, logical *, logical *); extern doublereal slamc3_(real *, real *); extern /* Subroutine */ int slamc4_(integer *, real *, integer *), slamc5_(integer *, integer *, integer *, logical *, integer *, real *); integer ngnmin, ngpmin; /* Fortran I/O blocks */ static cilist io___58 = { 0, 6, 0, fmt_9999, 0 }; /* -- LAPACK auxiliary routine (version 3.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SLAMC2 determines the machine parameters specified in its argument */ /* list. */ /* Arguments */ /* ========= */ /* BETA (output) INTEGER */ /* The base of the machine. */ /* T (output) INTEGER */ /* The number of ( BETA ) digits in the mantissa. */ /* RND (output) LOGICAL */ /* Specifies whether proper rounding ( RND = .TRUE. ) or */ /* chopping ( RND = .FALSE. ) occurs in addition. This may not */ /* be a reliable guide to the way in which the machine performs */ /* its arithmetic. */ /* EPS (output) REAL */ /* The smallest positive number such that */ /* fl( 1.0 - EPS ) .LT. 1.0, */ /* where fl denotes the computed value. */ /* EMIN (output) INTEGER */ /* The minimum exponent before (gradual) underflow occurs. */ /* RMIN (output) REAL */ /* The smallest normalized number for the machine, given by */ /* BASE**( EMIN - 1 ), where BASE is the floating point value */ /* of BETA. */ /* EMAX (output) INTEGER */ /* The maximum exponent before overflow occurs. */ /* RMAX (output) REAL */ /* The largest positive number for the machine, given by */ /* BASE**EMAX * ( 1 - EPS ), where BASE is the floating point */ /* value of BETA. */ /* Further Details */ /* =============== */ /* The computation of EPS is based on a routine PARANOIA by */ /* W. Kahan of the University of California at Berkeley. */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Save statement .. */ /* .. */ /* .. Data statements .. */ /* .. */ /* .. Executable Statements .. */ if (first) { zero = 0.f; one = 1.f; two = 2.f; /* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of */ /* BETA, T, RND, EPS, EMIN and RMIN. */ /* Throughout this routine we use the function SLAMC3 to ensure */ /* that relevant values are stored and not held in registers, or */ /* are not affected by optimizers. */ /* SLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. */ slamc1_(&lbeta, <, &lrnd, &lieee1); /* Start to find EPS. */ b = (real) lbeta; i__1 = -lt; a = pow_ri(&b, &i__1); leps = a; /* Try some tricks to see whether or not this is the correct EPS. */ b = two / 3; half = one / 2; r__1 = -half; sixth = slamc3_(&b, &r__1); third = slamc3_(&sixth, &sixth); r__1 = -half; b = slamc3_(&third, &r__1); b = slamc3_(&b, &sixth); b = dabs(b); if (b < leps) { b = leps; } leps = 1.f; /* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */ L10: if (leps > b && b > zero) { leps = b; r__1 = half * leps; /* Computing 5th power */ r__3 = two, r__4 = r__3, r__3 *= r__3; /* Computing 2nd power */ r__5 = leps; r__2 = r__4 * (r__3 * r__3) * (r__5 * r__5); c__ = slamc3_(&r__1, &r__2); r__1 = -c__; c__ = slamc3_(&half, &r__1); b = slamc3_(&half, &c__); r__1 = -b; c__ = slamc3_(&half, &r__1); b = slamc3_(&half, &c__); goto L10; } /* + END WHILE */ if (a < leps) { leps = a; } /* Computation of EPS complete. */ /* Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). */ /* Keep dividing A by BETA until (gradual) underflow occurs. This */ /* is detected when we cannot recover the previous A. */ rbase = one / lbeta; small = one; for (i__ = 1; i__ <= 3; ++i__) { r__1 = small * rbase; small = slamc3_(&r__1, &zero); /* L20: */ } a = slamc3_(&one, &small); slamc4_(&ngpmin, &one, &lbeta); r__1 = -one; slamc4_(&ngnmin, &r__1, &lbeta); slamc4_(&gpmin, &a, &lbeta); r__1 = -a; slamc4_(&gnmin, &r__1, &lbeta); ieee = FALSE_; if (ngpmin == ngnmin && gpmin == gnmin) { if (ngpmin == gpmin) { lemin = ngpmin; /* ( Non twos-complement machines, no gradual underflow; */ /* e.g., VAX ) */ } else if (gpmin - ngpmin == 3) { lemin = ngpmin - 1 + lt; ieee = TRUE_; /* ( Non twos-complement machines, with gradual underflow; */ /* e.g., IEEE standard followers ) */ } else { lemin = min(ngpmin,gpmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else if (ngpmin == gpmin && ngnmin == gnmin) { if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) { lemin = max(ngpmin,ngnmin); /* ( Twos-complement machines, no gradual underflow; */ /* e.g., CYBER 205 ) */ } else { lemin = min(ngpmin,ngnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin) { if (gpmin - min(ngpmin,ngnmin) == 3) { lemin = max(ngpmin,ngnmin) - 1 + lt; /* ( Twos-complement machines with gradual underflow; */ /* no known machine ) */ } else { lemin = min(ngpmin,ngnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else { /* Computing MIN */ i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin); lemin = min(i__1,gnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } first = FALSE_; /* ** */ /* Comment out this if block if EMIN is ok */ if (iwarn) { first = TRUE_; printf("\n\n WARNING. The value EMIN may be incorrect:- "); printf("EMIN = %8i\n",lemin); printf("If, after inspection, the value EMIN looks acceptable"); printf("please comment out \n the IF block as marked within the"); printf("code of routine SLAMC2, \n otherwise supply EMIN"); printf("explicitly.\n"); /* s_wsfe(&io___58); do_fio(&c__1, (char *)&lemin, (ftnlen)sizeof(integer)); e_wsfe(); */ } /* ** */ /* Assume IEEE arithmetic if we found denormalised numbers above, */ /* or if arithmetic seems to round in the IEEE style, determined */ /* in routine SLAMC1. A true IEEE machine should have both things */ /* true; however, faulty machines may have one or the other. */ ieee = ieee || lieee1; /* Compute RMIN by successive division by BETA. We could compute */ /* RMIN as BASE**( EMIN - 1 ), but some machines underflow during */ /* this computation. */ lrmin = 1.f; i__1 = 1 - lemin; for (i__ = 1; i__ <= i__1; ++i__) { r__1 = lrmin * rbase; lrmin = slamc3_(&r__1, &zero); /* L30: */ } /* Finally, call SLAMC5 to compute EMAX and RMAX. */ slamc5_(&lbeta, <, &lemin, &ieee, &lemax, &lrmax); } *beta = lbeta; *t = lt; *rnd = lrnd; *eps = leps; *emin = lemin; *rmin = lrmin; *emax = lemax; *rmax = lrmax; return 0; /* End of SLAMC2 */ } /* slamc2_ */
/* Subroutine */ int slaed9_(integer *k, integer *kstart, integer *kstop, integer *n, real *d__, real *q, integer *ldq, real *rho, real *dlamda, real *w, real *s, integer *lds, integer *info) { /* System generated locals */ integer q_dim1, q_offset, s_dim1, s_offset, i__1, i__2; real r__1; /* Builtin functions */ double sqrt(doublereal), r_sign(real *, real *); /* Local variables */ integer i__, j; real temp; extern doublereal snrm2_(integer *, real *, integer *); extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *), slaed4_(integer *, integer *, real *, real *, real *, real *, real *, integer *); extern doublereal slamc3_(real *, real *); extern /* Subroutine */ int xerbla_(char *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SLAED9 finds the roots of the secular equation, as defined by the */ /* values in D, Z, and RHO, between KSTART and KSTOP. It makes the */ /* appropriate calls to SLAED4 and then stores the new matrix of */ /* eigenvectors for use in calculating the next level of Z vectors. */ /* Arguments */ /* ========= */ /* K (input) INTEGER */ /* The number of terms in the rational function to be solved by */ /* SLAED4. K >= 0. */ /* KSTART (input) INTEGER */ /* KSTOP (input) INTEGER */ /* The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP */ /* are to be computed. 1 <= KSTART <= KSTOP <= K. */ /* N (input) INTEGER */ /* The number of rows and columns in the Q matrix. */ /* N >= K (delation may result in N > K). */ /* D (output) REAL array, dimension (N) */ /* D(I) contains the updated eigenvalues */ /* for KSTART <= I <= KSTOP. */ /* Q (workspace) REAL array, dimension (LDQ,N) */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= max( 1, N ). */ /* RHO (input) REAL */ /* The value of the parameter in the rank one update equation. */ /* RHO >= 0 required. */ /* DLAMDA (input) REAL array, dimension (K) */ /* The first K elements of this array contain the old roots */ /* of the deflated updating problem. These are the poles */ /* of the secular equation. */ /* W (input) REAL array, dimension (K) */ /* The first K elements of this array contain the components */ /* of the deflation-adjusted updating vector. */ /* S (output) REAL array, dimension (LDS, K) */ /* Will contain the eigenvectors of the repaired matrix which */ /* will be stored for subsequent Z vector calculation and */ /* multiplied by the previously accumulated eigenvectors */ /* to update the system. */ /* LDS (input) INTEGER */ /* The leading dimension of S. LDS >= max( 1, K ). */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = 1, an eigenvalue did not converge */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Jeff Rutter, Computer Science Division, University of California */ /* at Berkeley, USA */ /* ===================================================================== */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --dlamda; --w; s_dim1 = *lds; s_offset = 1 + s_dim1; s -= s_offset; /* Function Body */ *info = 0; if (*k < 0) { *info = -1; } else if (*kstart < 1 || *kstart > max(1,*k)) { *info = -2; } else if (max(1,*kstop) < *kstart || *kstop > max(1,*k)) { *info = -3; } else if (*n < *k) { *info = -4; } else if (*ldq < max(1,*k)) { *info = -7; } else if (*lds < max(1,*k)) { *info = -12; } if (*info != 0) { i__1 = -(*info); xerbla_("SLAED9", &i__1); return 0; } /* Quick return if possible */ if (*k == 0) { return 0; } /* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can */ /* be computed with high relative accuracy (barring over/underflow). */ /* This is a problem on machines without a guard digit in */ /* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */ /* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), */ /* which on any of these machines zeros out the bottommost */ /* bit of DLAMDA(I) if it is 1; this makes the subsequent */ /* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation */ /* occurs. On binary machines with a guard digit (almost all */ /* machines) it does not change DLAMDA(I) at all. On hexadecimal */ /* and decimal machines with a guard digit, it slightly */ /* changes the bottommost bits of DLAMDA(I). It does not account */ /* for hexadecimal or decimal machines without guard digits */ /* (we know of none). We use a subroutine call to compute */ /* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating */ /* this code. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { dlamda[i__] = slamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; /* L10: */ } i__1 = *kstop; for (j = *kstart; j <= i__1; ++j) { slaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], info); /* If the zero finder fails, the computation is terminated. */ if (*info != 0) { goto L120; } /* L20: */ } if (*k == 1 || *k == 2) { i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = *k; for (j = 1; j <= i__2; ++j) { s[j + i__ * s_dim1] = q[j + i__ * q_dim1]; /* L30: */ } /* L40: */ } goto L120; } /* Compute updated W. */ scopy_(k, &w[1], &c__1, &s[s_offset], &c__1); /* Initialize W(I) = Q(I,I) */ i__1 = *ldq + 1; scopy_(k, &q[q_offset], &i__1, &w[1], &c__1); i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); /* L50: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); /* L60: */ } /* L70: */ } i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { r__1 = sqrt(-w[i__]); w[i__] = r_sign(&r__1, &s[i__ + s_dim1]); /* L80: */ } /* Compute eigenvectors of the modified rank-1 modification. */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = *k; for (i__ = 1; i__ <= i__2; ++i__) { q[i__ + j * q_dim1] = w[i__] / q[i__ + j * q_dim1]; /* L90: */ } temp = snrm2_(k, &q[j * q_dim1 + 1], &c__1); i__2 = *k; for (i__ = 1; i__ <= i__2; ++i__) { s[i__ + j * s_dim1] = q[i__ + j * q_dim1] / temp; /* L100: */ } /* L110: */ } L120: return 0; /* End of SLAED9 */ } /* slaed9_ */
/* Subroutine */ int slamc5_(integer *beta, integer *p, integer *emin, logical *ieee, integer *emax, real *rmax) { /* -- LAPACK auxiliary routine (version 2.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University October 31, 1992 Purpose ======= SLAMC5 attempts to compute RMAX, the largest machine floating-point number, without overflow. It assumes that EMAX + abs(EMIN) sum approximately to a power of 2. It will fail on machines where this assumption does not hold, for example, the Cyber 205 (EMIN = -28625, EMAX = 28718). It will also fail if the value supplied for EMIN is too large (i.e. too close to zero), probably with overflow. Arguments ========= BETA (input) INTEGER The base of floating-point arithmetic. P (input) INTEGER The number of base BETA digits in the mantissa of a floating-point value. EMIN (input) INTEGER The minimum exponent before (gradual) underflow. IEEE (input) LOGICAL A logical flag specifying whether or not the arithmetic system is thought to comply with the IEEE standard. EMAX (output) INTEGER The largest exponent before overflow RMAX (output) REAL The largest machine floating-point number. ===================================================================== First compute LEXP and UEXP, two powers of 2 that bound abs(EMIN). We then assume that EMAX + abs(EMIN) will sum approximately to the bound that is closest to abs(EMIN). (EMAX is the exponent of the required number RMAX). */ /* Table of constant values */ static real c_b5 = 0.f; /* System generated locals */ integer i__1; real r__1; /* Local variables */ static integer lexp; static real oldy; static integer uexp, i; static real y, z; static integer nbits; extern doublereal slamc3_(real *, real *); static real recbas; static integer exbits, expsum, try__; lexp = 1; exbits = 1; L10: try__ = lexp << 1; if (try__ <= -(*emin)) { lexp = try__; ++exbits; goto L10; } if (lexp == -(*emin)) { uexp = lexp; } else { uexp = try__; ++exbits; } /* Now -LEXP is less than or equal to EMIN, and -UEXP is greater than or equal to EMIN. EXBITS is the number of bits needed to store the exponent. */ if (uexp + *emin > -lexp - *emin) { expsum = lexp << 1; } else { expsum = uexp << 1; } /* EXPSUM is the exponent range, approximately equal to EMAX - EMIN + 1 . */ *emax = expsum + *emin - 1; nbits = exbits + 1 + *p; /* NBITS is the total number of bits needed to store a floating-point number. */ if (nbits % 2 == 1 && *beta == 2) { /* Either there are an odd number of bits used to store a floating-point number, which is unlikely, or some bits are not used in the representation of numbers, which is possible , (e.g. Cray machines) or the mantissa has an implicit bit, (e.g. IEEE machines, Dec Vax machines), which is perhaps the most likely. We have to assume the last alternative. If this is true, then we need to reduce EMAX by one because there must be some way of representing zero in an implicit-b it system. On machines like Cray, we are reducing EMAX by one unnecessarily. */ --(*emax); } if (*ieee) { /* Assume we are on an IEEE machine which reserves one exponent for infinity and NaN. */ --(*emax); } /* Now create RMAX, the largest machine number, which should be equal to (1.0 - BETA**(-P)) * BETA**EMAX . First compute 1.0 - BETA**(-P), being careful that the result is less than 1.0 . */ recbas = 1.f / *beta; z = *beta - 1.f; y = 0.f; i__1 = *p; for (i = 1; i <= *p; ++i) { z *= recbas; if (y < 1.f) { oldy = y; } y = slamc3_(&y, &z); /* L20: */ } if (y >= 1.f) { y = oldy; } /* Now multiply by BETA**EMAX to get RMAX. */ i__1 = *emax; for (i = 1; i <= *emax; ++i) { r__1 = y * *beta; y = slamc3_(&r__1, &c_b5); /* L30: */ } *rmax = y; return 0; /* End of SLAMC5 */ } /* slamc5_ */
/* Subroutine */ int slasd3_(integer *nl, integer *nr, integer *sqre, integer *k, real *d__, real *q, integer *ldq, real *dsigma, real *u, integer * ldu, real *u2, integer *ldu2, real *vt, integer *ldvt, real *vt2, integer *ldvt2, integer *idxc, integer *ctot, real *z__, integer * info) { /* System generated locals */ integer q_dim1, q_offset, u_dim1, u_offset, u2_dim1, u2_offset, vt_dim1, vt_offset, vt2_dim1, vt2_offset, i__1, i__2; real r__1, r__2; /* Builtin functions */ double sqrt(doublereal), r_sign(real *, real *); /* Local variables */ integer i__, j, m, n, jc; real rho; integer nlp1, nlp2, nrp1; real temp; extern doublereal snrm2_(integer *, real *, integer *); integer ctemp; extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); integer ktemp; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *); extern doublereal slamc3_(real *, real *); extern /* Subroutine */ int slasd4_(integer *, integer *, real *, real *, real *, real *, real *, real *, integer *), xerbla_(char *, integer *), slascl_(char *, integer *, integer *, real *, real *, integer *, integer *, real *, integer *, integer *), slacpy_(char *, integer *, integer *, real *, integer *, real *, integer *); /* -- LAPACK auxiliary routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SLASD3 finds all the square roots of the roots of the secular */ /* equation, as defined by the values in D and Z. It makes the */ /* appropriate calls to SLASD4 and then updates the singular */ /* vectors by matrix multiplication. */ /* This code makes very mild assumptions about floating point */ /* arithmetic. It will work on machines with a guard digit in */ /* add/subtract, or on those binary machines without guard digits */ /* which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2. */ /* It could conceivably fail on hexadecimal or decimal machines */ /* without guard digits, but we know of none. */ /* SLASD3 is called from SLASD1. */ /* Arguments */ /* ========= */ /* NL (input) INTEGER */ /* The row dimension of the upper block. NL >= 1. */ /* NR (input) INTEGER */ /* The row dimension of the lower block. NR >= 1. */ /* SQRE (input) INTEGER */ /* = 0: the lower block is an NR-by-NR square matrix. */ /* = 1: the lower block is an NR-by-(NR+1) rectangular matrix. */ /* The bidiagonal matrix has N = NL + NR + 1 rows and */ /* M = N + SQRE >= N columns. */ /* K (input) INTEGER */ /* The size of the secular equation, 1 =< K = < N. */ /* D (output) REAL array, dimension(K) */ /* On exit the square roots of the roots of the secular equation, */ /* in ascending order. */ /* Q (workspace) REAL array, */ /* dimension at least (LDQ,K). */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= K. */ /* DSIGMA (input/output) REAL array, dimension(K) */ /* The first K elements of this array contain the old roots */ /* of the deflated updating problem. These are the poles */ /* of the secular equation. */ /* U (output) REAL array, dimension (LDU, N) */ /* The last N - K columns of this matrix contain the deflated */ /* left singular vectors. */ /* LDU (input) INTEGER */ /* The leading dimension of the array U. LDU >= N. */ /* U2 (input) REAL array, dimension (LDU2, N) */ /* The first K columns of this matrix contain the non-deflated */ /* left singular vectors for the split problem. */ /* LDU2 (input) INTEGER */ /* The leading dimension of the array U2. LDU2 >= N. */ /* VT (output) REAL array, dimension (LDVT, M) */ /* The last M - K columns of VT' contain the deflated */ /* right singular vectors. */ /* LDVT (input) INTEGER */ /* The leading dimension of the array VT. LDVT >= N. */ /* VT2 (input/output) REAL array, dimension (LDVT2, N) */ /* The first K columns of VT2' contain the non-deflated */ /* right singular vectors for the split problem. */ /* LDVT2 (input) INTEGER */ /* The leading dimension of the array VT2. LDVT2 >= N. */ /* IDXC (input) INTEGER array, dimension (N) */ /* The permutation used to arrange the columns of U (and rows of */ /* VT) into three groups: the first group contains non-zero */ /* entries only at and above (or before) NL +1; the second */ /* contains non-zero entries only at and below (or after) NL+2; */ /* and the third is dense. The first column of U and the row of */ /* VT are treated separately, however. */ /* The rows of the singular vectors found by SLASD4 */ /* must be likewise permuted before the matrix multiplies can */ /* take place. */ /* CTOT (input) INTEGER array, dimension (4) */ /* A count of the total number of the various types of columns */ /* in U (or rows in VT), as described in IDXC. The fourth column */ /* type is any column which has been deflated. */ /* Z (input/output) REAL array, dimension (K) */ /* The first K elements of this array contain the components */ /* of the deflation-adjusted updating row vector. */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = 1, an singular value did not converge */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Ming Gu and Huan Ren, Computer Science Division, University of */ /* California at Berkeley, USA */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --dsigma; u_dim1 = *ldu; u_offset = 1 + u_dim1; u -= u_offset; u2_dim1 = *ldu2; u2_offset = 1 + u2_dim1; u2 -= u2_offset; vt_dim1 = *ldvt; vt_offset = 1 + vt_dim1; vt -= vt_offset; vt2_dim1 = *ldvt2; vt2_offset = 1 + vt2_dim1; vt2 -= vt2_offset; --idxc; --ctot; --z__; /* Function Body */ *info = 0; if (*nl < 1) { *info = -1; } else if (*nr < 1) { *info = -2; } else if (*sqre != 1 && *sqre != 0) { *info = -3; } n = *nl + *nr + 1; m = n + *sqre; nlp1 = *nl + 1; nlp2 = *nl + 2; if (*k < 1 || *k > n) { *info = -4; } else if (*ldq < *k) { *info = -7; } else if (*ldu < n) { *info = -10; } else if (*ldu2 < n) { *info = -12; } else if (*ldvt < m) { *info = -14; } else if (*ldvt2 < m) { *info = -16; } if (*info != 0) { i__1 = -(*info); xerbla_("SLASD3", &i__1); return 0; } /* Quick return if possible */ if (*k == 1) { d__[1] = dabs(z__[1]); scopy_(&m, &vt2[vt2_dim1 + 1], ldvt2, &vt[vt_dim1 + 1], ldvt); if (z__[1] > 0.f) { scopy_(&n, &u2[u2_dim1 + 1], &c__1, &u[u_dim1 + 1], &c__1); } else { i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { u[i__ + u_dim1] = -u2[i__ + u2_dim1]; /* L10: */ } } return 0; } /* Modify values DSIGMA(i) to make sure all DSIGMA(i)-DSIGMA(j) can */ /* be computed with high relative accuracy (barring over/underflow). */ /* This is a problem on machines without a guard digit in */ /* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */ /* The following code replaces DSIGMA(I) by 2*DSIGMA(I)-DSIGMA(I), */ /* which on any of these machines zeros out the bottommost */ /* bit of DSIGMA(I) if it is 1; this makes the subsequent */ /* subtractions DSIGMA(I)-DSIGMA(J) unproblematic when cancellation */ /* occurs. On binary machines with a guard digit (almost all */ /* machines) it does not change DSIGMA(I) at all. On hexadecimal */ /* and decimal machines with a guard digit, it slightly */ /* changes the bottommost bits of DSIGMA(I). It does not account */ /* for hexadecimal or decimal machines without guard digits */ /* (we know of none). We use a subroutine call to compute */ /* 2*DSIGMA(I) to prevent optimizing compilers from eliminating */ /* this code. */ i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { dsigma[i__] = slamc3_(&dsigma[i__], &dsigma[i__]) - dsigma[i__]; /* L20: */ } /* Keep a copy of Z. */ scopy_(k, &z__[1], &c__1, &q[q_offset], &c__1); /* Normalize Z. */ rho = snrm2_(k, &z__[1], &c__1); slascl_("G", &c__0, &c__0, &rho, &c_b13, k, &c__1, &z__[1], k, info); rho *= rho; /* Find the new singular values. */ i__1 = *k; for (j = 1; j <= i__1; ++j) { slasd4_(k, &j, &dsigma[1], &z__[1], &u[j * u_dim1 + 1], &rho, &d__[j], &vt[j * vt_dim1 + 1], info); /* If the zero finder fails, the computation is terminated. */ if (*info != 0) { return 0; } /* L30: */ } /* Compute updated Z. */ i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { z__[i__] = u[i__ + *k * u_dim1] * vt[i__ + *k * vt_dim1]; i__2 = i__ - 1; for (j = 1; j <= i__2; ++j) { z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[ i__] - dsigma[j]) / (dsigma[i__] + dsigma[j]); /* L40: */ } i__2 = *k - 1; for (j = i__; j <= i__2; ++j) { z__[i__] *= u[i__ + j * u_dim1] * vt[i__ + j * vt_dim1] / (dsigma[ i__] - dsigma[j + 1]) / (dsigma[i__] + dsigma[j + 1]); /* L50: */ } r__2 = sqrt((r__1 = z__[i__], dabs(r__1))); z__[i__] = r_sign(&r__2, &q[i__ + q_dim1]); /* L60: */ } /* Compute left singular vectors of the modified diagonal matrix, */ /* and store related information for the right singular vectors. */ i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { vt[i__ * vt_dim1 + 1] = z__[1] / u[i__ * u_dim1 + 1] / vt[i__ * vt_dim1 + 1]; u[i__ * u_dim1 + 1] = -1.f; i__2 = *k; for (j = 2; j <= i__2; ++j) { vt[j + i__ * vt_dim1] = z__[j] / u[j + i__ * u_dim1] / vt[j + i__ * vt_dim1]; u[j + i__ * u_dim1] = dsigma[j] * vt[j + i__ * vt_dim1]; /* L70: */ } temp = snrm2_(k, &u[i__ * u_dim1 + 1], &c__1); q[i__ * q_dim1 + 1] = u[i__ * u_dim1 + 1] / temp; i__2 = *k; for (j = 2; j <= i__2; ++j) { jc = idxc[j]; q[j + i__ * q_dim1] = u[jc + i__ * u_dim1] / temp; /* L80: */ } /* L90: */ } /* Update the left singular vector matrix. */ if (*k == 2) { sgemm_("N", "N", &n, k, k, &c_b13, &u2[u2_offset], ldu2, &q[q_offset], ldq, &c_b26, &u[u_offset], ldu); goto L100; } if (ctot[1] > 0) { sgemm_("N", "N", nl, k, &ctot[1], &c_b13, &u2[(u2_dim1 << 1) + 1], ldu2, &q[q_dim1 + 2], ldq, &c_b26, &u[u_dim1 + 1], ldu); if (ctot[3] > 0) { ktemp = ctot[1] + 2 + ctot[2]; sgemm_("N", "N", nl, k, &ctot[3], &c_b13, &u2[ktemp * u2_dim1 + 1] , ldu2, &q[ktemp + q_dim1], ldq, &c_b13, &u[u_dim1 + 1], ldu); } } else if (ctot[3] > 0) { ktemp = ctot[1] + 2 + ctot[2]; sgemm_("N", "N", nl, k, &ctot[3], &c_b13, &u2[ktemp * u2_dim1 + 1], ldu2, &q[ktemp + q_dim1], ldq, &c_b26, &u[u_dim1 + 1], ldu); } else { slacpy_("F", nl, k, &u2[u2_offset], ldu2, &u[u_offset], ldu); } scopy_(k, &q[q_dim1 + 1], ldq, &u[nlp1 + u_dim1], ldu); ktemp = ctot[1] + 2; ctemp = ctot[2] + ctot[3]; sgemm_("N", "N", nr, k, &ctemp, &c_b13, &u2[nlp2 + ktemp * u2_dim1], ldu2, &q[ktemp + q_dim1], ldq, &c_b26, &u[nlp2 + u_dim1], ldu); /* Generate the right singular vectors. */ L100: i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { temp = snrm2_(k, &vt[i__ * vt_dim1 + 1], &c__1); q[i__ + q_dim1] = vt[i__ * vt_dim1 + 1] / temp; i__2 = *k; for (j = 2; j <= i__2; ++j) { jc = idxc[j]; q[i__ + j * q_dim1] = vt[jc + i__ * vt_dim1] / temp; /* L110: */ } /* L120: */ } /* Update the right singular vector matrix. */ if (*k == 2) { sgemm_("N", "N", k, &m, k, &c_b13, &q[q_offset], ldq, &vt2[vt2_offset] , ldvt2, &c_b26, &vt[vt_offset], ldvt); return 0; } ktemp = ctot[1] + 1; sgemm_("N", "N", k, &nlp1, &ktemp, &c_b13, &q[q_dim1 + 1], ldq, &vt2[ vt2_dim1 + 1], ldvt2, &c_b26, &vt[vt_dim1 + 1], ldvt); ktemp = ctot[1] + 2 + ctot[2]; if (ktemp <= *ldvt2) { sgemm_("N", "N", k, &nlp1, &ctot[3], &c_b13, &q[ktemp * q_dim1 + 1], ldq, &vt2[ktemp + vt2_dim1], ldvt2, &c_b13, &vt[vt_dim1 + 1], ldvt); } ktemp = ctot[1] + 1; nrp1 = *nr + *sqre; if (ktemp > 1) { i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { q[i__ + ktemp * q_dim1] = q[i__ + q_dim1]; /* L130: */ } i__1 = m; for (i__ = nlp2; i__ <= i__1; ++i__) { vt2[ktemp + i__ * vt2_dim1] = vt2[i__ * vt2_dim1 + 1]; /* L140: */ } } ctemp = ctot[2] + 1 + ctot[3]; sgemm_("N", "N", k, &nrp1, &ctemp, &c_b13, &q[ktemp * q_dim1 + 1], ldq, & vt2[ktemp + nlp2 * vt2_dim1], ldvt2, &c_b26, &vt[nlp2 * vt_dim1 + 1], ldvt); return 0; /* End of SLASD3 */ } /* slasd3_ */
int slamc1_(int *beta, int *t, int *rnd, int *ieee1) { /* Initialized data */ static int first = TRUE_; /* System generated locals */ float r__1, r__2; /* Local variables */ static int lrnd; static float a, b, c, f; static int lbeta; static float savec; static int lieee1; static float t1, t2; extern double slamc3_(float *, float *); static int lt; static float one, qtr; if (first) { first = FALSE_; one = 1.f; /* LBETA, LIEEE1, LT and LRND are the local values of BE TA, IEEE1, T and RND. Throughout this routine we use the function SLAMC3 to ens ure that relevant values are stored and not held in registers, or are not affected by optimizers. Compute a = 2.0**m with the smallest positive integer m s uch that fl( a + 1.0 ) = a. */ a = 1.f; c = 1.f; /* + WHILE( C.EQ.ONE )LOOP */ L10: if (c == one) { a *= 2; c = slamc3_(&a, &one); r__1 = -(double)a; c = slamc3_(&c, &r__1); goto L10; } /* + END WHILE Now compute b = 2.0**m with the smallest positive integer m such that fl( a + b ) .gt. a. */ b = 1.f; c = slamc3_(&a, &b); /* + WHILE( C.EQ.A )LOOP */ L20: if (c == a) { b *= 2; c = slamc3_(&a, &b); goto L20; } /* + END WHILE Now compute the base. a and c are neighbouring floating po int numbers in the interval ( beta**t, beta**( t + 1 ) ) and so their difference is beta. Adding 0.25 to c is to ensure that it is truncated to beta and not ( beta - 1 ). */ qtr = one / 4; savec = c; r__1 = -(double)a; c = slamc3_(&c, &r__1); lbeta = c + qtr; /* Now determine whether rounding or chopping occurs, by addin g a bit less than beta/2 and a bit more than beta/2 to a. */ b = (float) lbeta; r__1 = b / 2; r__2 = -(double)b / 100; f = slamc3_(&r__1, &r__2); c = slamc3_(&f, &a); if (c == a) { lrnd = TRUE_; } else { lrnd = FALSE_; } r__1 = b / 2; r__2 = b / 100; f = slamc3_(&r__1, &r__2); c = slamc3_(&f, &a); if (lrnd && c == a) { lrnd = FALSE_; } /* Try and decide whether rounding is done in the IEEE 'round to nearest' style. B/2 is half a unit in the last place of the two numbers A and SAVEC. Furthermore, A is even, i.e. has last bit zero, and SAVEC is odd. Thus adding B/2 to A should not cha nge A, but adding B/2 to SAVEC should change SAVEC. */ r__1 = b / 2; t1 = slamc3_(&r__1, &a); r__1 = b / 2; t2 = slamc3_(&r__1, &savec); lieee1 = t1 == a && t2 > savec && lrnd; /* Now find the mantissa, t. It should be the integer part of log to the base beta of a, however it is safer to determine t by powering. So we find t as the smallest positive integer for which fl( beta**t + 1.0 ) = 1.0. */ lt = 0; a = 1.f; c = 1.f; /* + WHILE( C.EQ.ONE )LOOP */ L30: if (c == one) { ++lt; a *= lbeta; c = slamc3_(&a, &one); r__1 = -(double)a; c = slamc3_(&c, &r__1); goto L30; } /* + END WHILE */ } *beta = lbeta; *t = lt; *rnd = lrnd; *ieee1 = lieee1; return 0; /* End of SLAMC1 */ } /* slamc1_ */
/*< SUBROUTINE SLAMC1( BETA, T, RND, IEEE1 ) >*/ /* Subroutine */ int slamc1_(integer *beta, integer *t, logical *rnd, logical *ieee1) { /* Initialized data */ static logical first = TRUE_; /* runtime-initialized constant */ /* System generated locals */ real r__1, r__2; /* Local variables */ real a, b, c__, f, t1, t2; static integer lt; /* runtime-initialized constant */ real one, qtr; static logical lrnd; /* runtime-initialized constant */ static integer lbeta; /* runtime-initialized constant */ real savec; static logical lieee1; /* runtime-initialized constant */ extern doublereal slamc3_(real *, real *); /* -- LAPACK auxiliary routine (version 1.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ /* Courant Institute, Argonne National Lab, and Rice University */ /* October 31, 1992 */ /* .. Scalar Arguments .. */ /*< LOGICAL IEEE1, RND >*/ /*< INTEGER BETA, T >*/ /* .. */ /* Purpose */ /* ======= */ /* SLAMC1 determines the machine parameters given by BETA, T, RND, and */ /* IEEE1. */ /* Arguments */ /* ========= */ /* BETA (output) INTEGER */ /* The base of the machine. */ /* T (output) INTEGER */ /* The number of ( BETA ) digits in the mantissa. */ /* RND (output) LOGICAL */ /* Specifies whether proper rounding ( RND = .TRUE. ) or */ /* chopping ( RND = .FALSE. ) occurs in addition. This may not */ /* be a reliable guide to the way in which the machine performs */ /* its arithmetic. */ /* IEEE1 (output) LOGICAL */ /* Specifies whether rounding appears to be done in the IEEE */ /* 'round to nearest' style. */ /* Further Details */ /* =============== */ /* The routine is based on the routine ENVRON by Malcolm and */ /* incorporates suggestions by Gentleman and Marovich. See */ /* Malcolm M. A. (1972) Algorithms to reveal properties of */ /* floating-point arithmetic. Comms. of the ACM, 15, 949-951. */ /* Gentleman W. M. and Marovich S. B. (1974) More on algorithms */ /* that reveal properties of floating point arithmetic units. */ /* Comms. of the ACM, 17, 276-277. */ /* ===================================================================== */ /* .. Local Scalars .. */ /*< LOGICAL FIRST, LIEEE1, LRND >*/ /*< INTEGER LBETA, LT >*/ /*< REAL A, B, C, F, ONE, QTR, SAVEC, T1, T2 >*/ /* .. */ /* .. External Functions .. */ /*< REAL SLAMC3 >*/ /*< EXTERNAL SLAMC3 >*/ /* .. */ /* .. Save statement .. */ /*< SAVE FIRST, LIEEE1, LBETA, LRND, LT >*/ /* .. */ /* .. Data statements .. */ /*< DATA FIRST / .TRUE. / >*/ /* .. */ /* .. Executable Statements .. */ /*< IF( FIRST ) THEN >*/ if (first) { /*< FIRST = .FALSE. >*/ first = FALSE_; /*< ONE = 1 >*/ one = (float)1.; /* LBETA, LIEEE1, LT and LRND are the local values of BETA, */ /* IEEE1, T and RND. */ /* Throughout this routine we use the function SLAMC3 to ensure */ /* that relevant values are stored and not held in registers, or */ /* are not affected by optimizers. */ /* Compute a = 2.0**m with the smallest positive integer m such */ /* that */ /* fl( a + 1.0 ) = a. */ /*< A = 1 >*/ a = (float)1.; /*< C = 1 >*/ c__ = (float)1.; /* + WHILE( C.EQ.ONE )LOOP */ /*< 10 CONTINUE >*/ L10: /*< IF( C.EQ.ONE ) THEN >*/ if (c__ == one) { /*< A = 2*A >*/ a *= 2; /*< C = SLAMC3( A, ONE ) >*/ c__ = slamc3_(&a, &one); /*< C = SLAMC3( C, -A ) >*/ r__1 = -a; c__ = slamc3_(&c__, &r__1); /*< GO TO 10 >*/ goto L10; /*< END IF >*/ } /* + END WHILE */ /* Now compute b = 2.0**m with the smallest positive integer m */ /* such that */ /* fl( a + b ) .gt. a. */ /*< B = 1 >*/ b = (float)1.; /*< C = SLAMC3( A, B ) >*/ c__ = slamc3_(&a, &b); /* + WHILE( C.EQ.A )LOOP */ /*< 20 CONTINUE >*/ L20: /*< IF( C.EQ.A ) THEN >*/ if (c__ == a) { /*< B = 2*B >*/ b *= 2; /*< C = SLAMC3( A, B ) >*/ c__ = slamc3_(&a, &b); /*< GO TO 20 >*/ goto L20; /*< END IF >*/ } /* + END WHILE */ /* Now compute the base. a and c are neighbouring floating point */ /* numbers in the interval ( beta**t, beta**( t + 1 ) ) and so */ /* their difference is beta. Adding 0.25 to c is to ensure that it */ /* is truncated to beta and not ( beta - 1 ). */ /*< QTR = ONE / 4 >*/ qtr = one / 4; /*< SAVEC = C >*/ savec = c__; /*< C = SLAMC3( C, -A ) >*/ r__1 = -a; c__ = slamc3_(&c__, &r__1); /*< LBETA = C + QTR >*/ lbeta = c__ + qtr; /* Now determine whether rounding or chopping occurs, by adding a */ /* bit less than beta/2 and a bit more than beta/2 to a. */ /*< B = LBETA >*/ b = (real) lbeta; /*< F = SLAMC3( B / 2, -B / 100 ) >*/ r__1 = b / 2; r__2 = -b / 100; f = slamc3_(&r__1, &r__2); /*< C = SLAMC3( F, A ) >*/ c__ = slamc3_(&f, &a); /*< IF( C.EQ.A ) THEN >*/ if (c__ == a) { /*< LRND = .TRUE. >*/ lrnd = TRUE_; /*< ELSE >*/ } else { /*< LRND = .FALSE. >*/ lrnd = FALSE_; /*< END IF >*/ } /*< F = SLAMC3( B / 2, B / 100 ) >*/ r__1 = b / 2; r__2 = b / 100; f = slamc3_(&r__1, &r__2); /*< C = SLAMC3( F, A ) >*/ c__ = slamc3_(&f, &a); /*< >*/ if (lrnd && c__ == a) { lrnd = FALSE_; } /* Try and decide whether rounding is done in the IEEE 'round to */ /* nearest' style. B/2 is half a unit in the last place of the two */ /* numbers A and SAVEC. Furthermore, A is even, i.e. has last bit */ /* zero, and SAVEC is odd. Thus adding B/2 to A should not change */ /* A, but adding B/2 to SAVEC should change SAVEC. */ /*< T1 = SLAMC3( B / 2, A ) >*/ r__1 = b / 2; t1 = slamc3_(&r__1, &a); /*< T2 = SLAMC3( B / 2, SAVEC ) >*/ r__1 = b / 2; t2 = slamc3_(&r__1, &savec); /*< LIEEE1 = ( T1.EQ.A ) .AND. ( T2.GT.SAVEC ) .AND. LRND >*/ lieee1 = t1 == a && t2 > savec && lrnd; /* Now find the mantissa, t. It should be the integer part of */ /* log to the base beta of a, however it is safer to determine t */ /* by powering. So we find t as the smallest positive integer for */ /* which */ /* fl( beta**t + 1.0 ) = 1.0. */ /*< LT = 0 >*/ lt = 0; /*< A = 1 >*/ a = (float)1.; /*< C = 1 >*/ c__ = (float)1.; /* + WHILE( C.EQ.ONE )LOOP */ /*< 30 CONTINUE >*/ L30: /*< IF( C.EQ.ONE ) THEN >*/ if (c__ == one) { /*< LT = LT + 1 >*/ ++lt; /*< A = A*LBETA >*/ a *= lbeta; /*< C = SLAMC3( A, ONE ) >*/ c__ = slamc3_(&a, &one); /*< C = SLAMC3( C, -A ) >*/ r__1 = -a; c__ = slamc3_(&c__, &r__1); /*< GO TO 30 >*/ goto L30; /*< END IF >*/ } /* + END WHILE */ /*< END IF >*/ } /*< BETA = LBETA >*/ *beta = lbeta; /*< T = LT >*/ *t = lt; /*< RND = LRND >*/ *rnd = lrnd; /*< IEEE1 = LIEEE1 >*/ *ieee1 = lieee1; /*< RETURN >*/ return 0; /* End of SLAMC1 */ /*< END >*/ } /* slamc1_ */
/*! \brief <pre> Purpose ======= SLAMC2 determines the machine parameters specified in its argument list. Arguments ========= BETA (output) INT The base of the machine. T (output) INT The number of ( BETA ) digits in the mantissa. RND (output) INT Specifies whether proper rounding ( RND = .TRUE. ) or chopping ( RND = .FALSE. ) occurs in addition. This may not be a reliable guide to the way in which the machine performs its arithmetic. EPS (output) FLOAT The smallest positive number such that fl( 1.0 - EPS ) .LT. 1.0, where fl denotes the computed value. EMIN (output) INT The minimum exponent before (gradual) underflow occurs. RMIN (output) FLOAT The smallest normalized number for the machine, given by BASE**( EMIN - 1 ), where BASE is the floating point value of BETA. EMAX (output) INT The maximum exponent before overflow occurs. RMAX (output) FLOAT The largest positive number for the machine, given by BASE**EMAX * ( 1 - EPS ), where BASE is the floating point value of BETA. Further Details =============== The computation of EPS is based on a routine PARANOIA by W. Kahan of the University of California at Berkeley. ===================================================================== </pre> */ int slamc2_(int *beta, int *t, int *rnd, float * eps, int *emin, float *rmin, int *emax, float *rmax) { /* Table of constant values */ static int c__1 = 1; /* Initialized data */ static int first = TRUE_; static int iwarn = FALSE_; /* System generated locals */ int i__1; float r__1, r__2, r__3, r__4, r__5; /* Builtin functions */ double pow_ri(float *, int *); /* Local variables */ static int ieee; static float half; static int lrnd; static float leps, zero, a, b, c; static int i, lbeta; static float rbase; static int lemin, lemax, gnmin; static float small; static int gpmin; static float third, lrmin, lrmax, sixth; static int lieee1; extern /* Subroutine */ int slamc1_(int *, int *, int *, int *); extern double slamc3_(float *, float *); extern /* Subroutine */ int slamc4_(int *, float *, int *), slamc5_(int *, int *, int *, int *, int *, float *); static int lt, ngnmin, ngpmin; static float one, two; if (first) { first = FALSE_; zero = 0.f; one = 1.f; two = 2.f; /* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of BETA, T, RND, EPS, EMIN and RMIN. Throughout this routine we use the function SLAMC3 to ens ure that relevant values are stored and not held in registers, or are not affected by optimizers. SLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. */ slamc1_(&lbeta, <, &lrnd, &lieee1); /* Start to find EPS. */ b = (float) lbeta; i__1 = -lt; a = pow_ri(&b, &i__1); leps = a; /* Try some tricks to see whether or not this is the correct E PS. */ b = two / 3; half = one / 2; r__1 = -(double)half; sixth = slamc3_(&b, &r__1); third = slamc3_(&sixth, &sixth); r__1 = -(double)half; b = slamc3_(&third, &r__1); b = slamc3_(&b, &sixth); b = dabs(b); if (b < leps) { b = leps; } leps = 1.f; /* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */ L10: if (leps > b && b > zero) { leps = b; r__1 = half * leps; /* Computing 5th power */ r__3 = two, r__4 = r__3, r__3 *= r__3; /* Computing 2nd power */ r__5 = leps; r__2 = r__4 * (r__3 * r__3) * (r__5 * r__5); c = slamc3_(&r__1, &r__2); r__1 = -(double)c; c = slamc3_(&half, &r__1); b = slamc3_(&half, &c); r__1 = -(double)b; c = slamc3_(&half, &r__1); b = slamc3_(&half, &c); goto L10; } /* + END WHILE */ if (a < leps) { leps = a; } /* Computation of EPS complete. Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3 )). Keep dividing A by BETA until (gradual) underflow occurs. T his is detected when we cannot recover the previous A. */ rbase = one / lbeta; small = one; for (i = 1; i <= 3; ++i) { r__1 = small * rbase; small = slamc3_(&r__1, &zero); /* L20: */ } a = slamc3_(&one, &small); slamc4_(&ngpmin, &one, &lbeta); r__1 = -(double)one; slamc4_(&ngnmin, &r__1, &lbeta); slamc4_(&gpmin, &a, &lbeta); r__1 = -(double)a; slamc4_(&gnmin, &r__1, &lbeta); ieee = FALSE_; if (ngpmin == ngnmin && gpmin == gnmin) { if (ngpmin == gpmin) { lemin = ngpmin; /* ( Non twos-complement machines, no gradual under flow; e.g., VAX ) */ } else if (gpmin - ngpmin == 3) { lemin = ngpmin - 1 + lt; ieee = TRUE_; /* ( Non twos-complement machines, with gradual und erflow; e.g., IEEE standard followers ) */ } else { lemin = min(ngpmin,gpmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else if (ngpmin == gpmin && ngnmin == gnmin) { if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) { lemin = max(ngpmin,ngnmin); /* ( Twos-complement machines, no gradual underflow ; e.g., CYBER 205 ) */ } else { lemin = min(ngpmin,ngnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin) { if (gpmin - min(ngpmin,ngnmin) == 3) { lemin = max(ngpmin,ngnmin) - 1 + lt; /* ( Twos-complement machines with gradual underflo w; no known machine ) */ } else { lemin = min(ngpmin,ngnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } } else { /* Computing MIN */ i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin); lemin = min(i__1,gnmin); /* ( A guess; no known machine ) */ iwarn = TRUE_; } /* ** Comment out this if block if EMIN is ok */ if (iwarn) { first = TRUE_; printf("\n\n WARNING. The value EMIN may be incorrect:- "); printf("EMIN = %8i\n",lemin); printf("If, after inspection, the value EMIN looks acceptable"); printf("please comment out \n the IF block as marked within the"); printf("code of routine SLAMC2, \n otherwise supply EMIN"); printf("explicitly.\n"); } /* ** Assume IEEE arithmetic if we found denormalised numbers abo ve, or if arithmetic seems to round in the IEEE style, determi ned in routine SLAMC1. A true IEEE machine should have both thi ngs true; however, faulty machines may have one or the other. */ ieee = ieee || lieee1; /* Compute RMIN by successive division by BETA. We could comp ute RMIN as BASE**( EMIN - 1 ), but some machines underflow dur ing this computation. */ lrmin = 1.f; i__1 = 1 - lemin; for (i = 1; i <= 1-lemin; ++i) { r__1 = lrmin * rbase; lrmin = slamc3_(&r__1, &zero); /* L30: */ } /* Finally, call SLAMC5 to compute EMAX and RMAX. */ slamc5_(&lbeta, <, &lemin, &ieee, &lemax, &lrmax); } *beta = lbeta; *t = lt; *rnd = lrnd; *eps = leps; *emin = lemin; *rmin = lrmin; *emax = lemax; *rmax = lrmax; return 0; /* End of SLAMC2 */ } /* slamc2_ */
/*< SUBROUTINE SLAMC4( EMIN, START, BASE ) >*/ /* Subroutine */ int slamc4_(integer *emin, real *start, integer *base) { /* System generated locals */ integer i__1; real r__1; /* Local variables */ real a; integer i__; real b1, b2, c1, c2, d1, d2, one, zero, rbase; extern doublereal slamc3_(real *, real *); /* -- LAPACK auxiliary routine (version 1.1) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ /* Courant Institute, Argonne National Lab, and Rice University */ /* October 31, 1992 */ /* .. Scalar Arguments .. */ /*< INTEGER BASE, EMIN >*/ /*< REAL START >*/ /* .. */ /* Purpose */ /* ======= */ /* SLAMC4 is a service routine for SLAMC2. */ /* Arguments */ /* ========= */ /* EMIN (output) EMIN */ /* The minimum exponent before (gradual) underflow, computed by */ /* setting A = START and dividing by BASE until the previous A */ /* can not be recovered. */ /* START (input) REAL */ /* The starting point for determining EMIN. */ /* BASE (input) INTEGER */ /* The base of the machine. */ /* ===================================================================== */ /* .. Local Scalars .. */ /*< INTEGER I >*/ /*< REAL A, B1, B2, C1, C2, D1, D2, ONE, RBASE, ZERO >*/ /* .. */ /* .. External Functions .. */ /*< REAL SLAMC3 >*/ /*< EXTERNAL SLAMC3 >*/ /* .. */ /* .. Executable Statements .. */ /*< A = START >*/ a = *start; /*< ONE = 1 >*/ one = (float)1.; /*< RBASE = ONE / BASE >*/ rbase = one / *base; /*< ZERO = 0 >*/ zero = (float)0.; /*< EMIN = 1 >*/ *emin = 1; /*< B1 = SLAMC3( A*RBASE, ZERO ) >*/ r__1 = a * rbase; b1 = slamc3_(&r__1, &zero); /*< C1 = A >*/ c1 = a; /*< C2 = A >*/ c2 = a; /*< D1 = A >*/ d1 = a; /*< D2 = A >*/ d2 = a; /* + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. */ /* $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP */ /*< 10 CONTINUE >*/ L10: /*< >*/ if (c1 == a && c2 == a && d1 == a && d2 == a) { /*< EMIN = EMIN - 1 >*/ --(*emin); /*< A = B1 >*/ a = b1; /*< B1 = SLAMC3( A / BASE, ZERO ) >*/ r__1 = a / *base; b1 = slamc3_(&r__1, &zero); /*< C1 = SLAMC3( B1*BASE, ZERO ) >*/ r__1 = b1 * *base; c1 = slamc3_(&r__1, &zero); /*< D1 = ZERO >*/ d1 = zero; /*< DO 20 I = 1, BASE >*/ i__1 = *base; for (i__ = 1; i__ <= i__1; ++i__) { /*< D1 = D1 + B1 >*/ d1 += b1; /*< 20 CONTINUE >*/ /* L20: */ } /*< B2 = SLAMC3( A*RBASE, ZERO ) >*/ r__1 = a * rbase; b2 = slamc3_(&r__1, &zero); /*< C2 = SLAMC3( B2 / RBASE, ZERO ) >*/ r__1 = b2 / rbase; c2 = slamc3_(&r__1, &zero); /*< D2 = ZERO >*/ d2 = zero; /*< DO 30 I = 1, BASE >*/ i__1 = *base; for (i__ = 1; i__ <= i__1; ++i__) { /*< D2 = D2 + B2 >*/ d2 += b2; /*< 30 CONTINUE >*/ /* L30: */ } /*< GO TO 10 >*/ goto L10; /*< END IF >*/ } /* + END WHILE */ /*< RETURN >*/ return 0; /* End of SLAMC4 */ /*< END >*/ } /* slamc4_ */
int slamc5_(int *beta, int *p, int *emin, int *ieee, int *emax, float *rmax) { /* Table of constant values */ static float c_b5 = 0.f; /* System generated locals */ int i__1; float r__1; /* Local variables */ static int lexp; static float oldy; static int uexp, i; static float y, z; static int nbits; extern double slamc3_(float *, float *); static float recbas; static int exbits, expsum, try__; lexp = 1; exbits = 1; L10: try__ = lexp << 1; if (try__ <= -(*emin)) { lexp = try__; ++exbits; goto L10; } if (lexp == -(*emin)) { uexp = lexp; } else { uexp = try__; ++exbits; } /* Now -LEXP is less than or equal to EMIN, and -UEXP is greater than or equal to EMIN. EXBITS is the number of bits needed to store the exponent. */ if (uexp + *emin > -lexp - *emin) { expsum = lexp << 1; } else { expsum = uexp << 1; } /* EXPSUM is the exponent range, approximately equal to EMAX - EMIN + 1 . */ *emax = expsum + *emin - 1; nbits = exbits + 1 + *p; /* NBITS is the total number of bits needed to store a floating-point number. */ if (nbits % 2 == 1 && *beta == 2) { /* Either there are an odd number of bits used to store a floating-point number, which is unlikely, or some bits are not used in the representation of numbers, which is possible , (e.g. Cray machines) or the mantissa has an implicit bit, (e.g. IEEE machines, Dec Vax machines), which is perhaps the most likely. We have to assume the last alternative. If this is true, then we need to reduce EMAX by one because there must be some way of representing zero in an implicit-b it system. On machines like Cray, we are reducing EMAX by one unnecessarily. */ --(*emax); } if (*ieee) { /* Assume we are on an IEEE machine which reserves one exponent for infinity and NaN. */ --(*emax); } /* Now create RMAX, the largest machine number, which should be equal to (1.0 - BETA**(-P)) * BETA**EMAX . First compute 1.0 - BETA**(-P), being careful that the result is less than 1.0 . */ recbas = 1.f / *beta; z = *beta - 1.f; y = 0.f; i__1 = *p; for (i = 1; i <= *p; ++i) { z *= recbas; if (y < 1.f) { oldy = y; } y = slamc3_(&y, &z); /* L20: */ } if (y >= 1.f) { y = oldy; } /* Now multiply by BETA**EMAX to get RMAX. */ i__1 = *emax; for (i = 1; i <= *emax; ++i) { r__1 = y * *beta; y = slamc3_(&r__1, &c_b5); /* L30: */ } *rmax = y; return 0; /* End of SLAMC5 */ } /* slamc5_ */
/* Subroutine */ int slaed3_(integer *k, integer *n, integer *n1, real *d__, real *q, integer *ldq, real *rho, real *dlamda, real *q2, integer * indx, integer *ctot, real *w, real *s, integer *info) { /* System generated locals */ integer q_dim1, q_offset, i__1, i__2; real r__1; /* Builtin functions */ double sqrt(doublereal), r_sign(real *, real *); /* Local variables */ integer i__, j, n2, n12, ii, n23, iq2; real temp; extern doublereal snrm2_(integer *, real *, integer *); extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *), scopy_(integer *, real *, integer *, real *, integer *), slaed4_(integer *, integer *, real *, real *, real *, real *, real *, integer *); extern doublereal slamc3_(real *, real *); extern /* Subroutine */ int xerbla_(char *, integer *), slacpy_( char *, integer *, integer *, real *, integer *, real *, integer * ), slaset_(char *, integer *, integer *, real *, real *, real *, integer *); /* -- LAPACK routine (version 3.2) -- */ /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */ /* November 2006 */ /* .. Scalar Arguments .. */ /* .. */ /* .. Array Arguments .. */ /* .. */ /* Purpose */ /* ======= */ /* SLAED3 finds the roots of the secular equation, as defined by the */ /* values in D, W, and RHO, between 1 and K. It makes the */ /* appropriate calls to SLAED4 and then updates the eigenvectors by */ /* multiplying the matrix of eigenvectors of the pair of eigensystems */ /* being combined by the matrix of eigenvectors of the K-by-K system */ /* which is solved here. */ /* This code makes very mild assumptions about floating point */ /* arithmetic. It will work on machines with a guard digit in */ /* add/subtract, or on those binary machines without guard digits */ /* which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. */ /* It could conceivably fail on hexadecimal or decimal machines */ /* without guard digits, but we know of none. */ /* Arguments */ /* ========= */ /* K (input) INTEGER */ /* The number of terms in the rational function to be solved by */ /* SLAED4. K >= 0. */ /* N (input) INTEGER */ /* The number of rows and columns in the Q matrix. */ /* N >= K (deflation may result in N>K). */ /* N1 (input) INTEGER */ /* The location of the last eigenvalue in the leading submatrix. */ /* min(1,N) <= N1 <= N/2. */ /* D (output) REAL array, dimension (N) */ /* D(I) contains the updated eigenvalues for */ /* 1 <= I <= K. */ /* Q (output) REAL array, dimension (LDQ,N) */ /* Initially the first K columns are used as workspace. */ /* On output the columns 1 to K contain */ /* the updated eigenvectors. */ /* LDQ (input) INTEGER */ /* The leading dimension of the array Q. LDQ >= max(1,N). */ /* RHO (input) REAL */ /* The value of the parameter in the rank one update equation. */ /* RHO >= 0 required. */ /* DLAMDA (input/output) REAL array, dimension (K) */ /* The first K elements of this array contain the old roots */ /* of the deflated updating problem. These are the poles */ /* of the secular equation. May be changed on output by */ /* having lowest order bit set to zero on Cray X-MP, Cray Y-MP, */ /* Cray-2, or Cray C-90, as described above. */ /* Q2 (input) REAL array, dimension (LDQ2, N) */ /* The first K columns of this matrix contain the non-deflated */ /* eigenvectors for the split problem. */ /* INDX (input) INTEGER array, dimension (N) */ /* The permutation used to arrange the columns of the deflated */ /* Q matrix into three groups (see SLAED2). */ /* The rows of the eigenvectors found by SLAED4 must be likewise */ /* permuted before the matrix multiply can take place. */ /* CTOT (input) INTEGER array, dimension (4) */ /* A count of the total number of the various types of columns */ /* in Q, as described in INDX. The fourth column type is any */ /* column which has been deflated. */ /* W (input/output) REAL array, dimension (K) */ /* The first K elements of this array contain the components */ /* of the deflation-adjusted updating vector. Destroyed on */ /* output. */ /* S (workspace) REAL array, dimension (N1 + 1)*K */ /* Will contain the eigenvectors of the repaired matrix which */ /* will be multiplied by the previously accumulated eigenvectors */ /* to update the system. */ /* LDS (input) INTEGER */ /* The leading dimension of S. LDS >= max(1,K). */ /* INFO (output) INTEGER */ /* = 0: successful exit. */ /* < 0: if INFO = -i, the i-th argument had an illegal value. */ /* > 0: if INFO = 1, an eigenvalue did not converge */ /* Further Details */ /* =============== */ /* Based on contributions by */ /* Jeff Rutter, Computer Science Division, University of California */ /* at Berkeley, USA */ /* Modified by Francoise Tisseur, University of Tennessee. */ /* ===================================================================== */ /* .. Parameters .. */ /* .. */ /* .. Local Scalars .. */ /* .. */ /* .. External Functions .. */ /* .. */ /* .. External Subroutines .. */ /* .. */ /* .. Intrinsic Functions .. */ /* .. */ /* .. Executable Statements .. */ /* Test the input parameters. */ /* Parameter adjustments */ --d__; q_dim1 = *ldq; q_offset = 1 + q_dim1; q -= q_offset; --dlamda; --q2; --indx; --ctot; --w; --s; /* Function Body */ *info = 0; if (*k < 0) { *info = -1; } else if (*n < *k) { *info = -2; } else if (*ldq < max(1,*n)) { *info = -6; } if (*info != 0) { i__1 = -(*info); xerbla_("SLAED3", &i__1); return 0; } /* Quick return if possible */ if (*k == 0) { return 0; } /* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can */ /* be computed with high relative accuracy (barring over/underflow). */ /* This is a problem on machines without a guard digit in */ /* add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). */ /* The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), */ /* which on any of these machines zeros out the bottommost */ /* bit of DLAMDA(I) if it is 1; this makes the subsequent */ /* subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation */ /* occurs. On binary machines with a guard digit (almost all */ /* machines) it does not change DLAMDA(I) at all. On hexadecimal */ /* and decimal machines with a guard digit, it slightly */ /* changes the bottommost bits of DLAMDA(I). It does not account */ /* for hexadecimal or decimal machines without guard digits */ /* (we know of none). We use a subroutine call to compute */ /* 2*DLAMBDA(I) to prevent optimizing compilers from eliminating */ /* this code. */ i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { dlamda[i__] = slamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; /* L10: */ } i__1 = *k; for (j = 1; j <= i__1; ++j) { slaed4_(k, &j, &dlamda[1], &w[1], &q[j * q_dim1 + 1], rho, &d__[j], info); /* If the zero finder fails, the computation is terminated. */ if (*info != 0) { goto L120; } /* L20: */ } if (*k == 1) { goto L110; } if (*k == 2) { i__1 = *k; for (j = 1; j <= i__1; ++j) { w[1] = q[j * q_dim1 + 1]; w[2] = q[j * q_dim1 + 2]; ii = indx[1]; q[j * q_dim1 + 1] = w[ii]; ii = indx[2]; q[j * q_dim1 + 2] = w[ii]; /* L30: */ } goto L110; } /* Compute updated W. */ scopy_(k, &w[1], &c__1, &s[1], &c__1); /* Initialize W(I) = Q(I,I) */ i__1 = *ldq + 1; scopy_(k, &q[q_offset], &i__1, &w[1], &c__1); i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); /* L40: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { w[i__] *= q[i__ + j * q_dim1] / (dlamda[i__] - dlamda[j]); /* L50: */ } /* L60: */ } i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { r__1 = sqrt(-w[i__]); w[i__] = r_sign(&r__1, &s[i__]); /* L70: */ } /* Compute eigenvectors of the modified rank-1 modification. */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = *k; for (i__ = 1; i__ <= i__2; ++i__) { s[i__] = w[i__] / q[i__ + j * q_dim1]; /* L80: */ } temp = snrm2_(k, &s[1], &c__1); i__2 = *k; for (i__ = 1; i__ <= i__2; ++i__) { ii = indx[i__]; q[i__ + j * q_dim1] = s[ii] / temp; /* L90: */ } /* L100: */ } /* Compute the updated eigenvectors. */ L110: n2 = *n - *n1; n12 = ctot[1] + ctot[2]; n23 = ctot[2] + ctot[3]; slacpy_("A", &n23, k, &q[ctot[1] + 1 + q_dim1], ldq, &s[1], &n23); iq2 = *n1 * n12 + 1; if (n23 != 0) { sgemm_("N", "N", &n2, k, &n23, &c_b22, &q2[iq2], &n2, &s[1], &n23, & c_b23, &q[*n1 + 1 + q_dim1], ldq); } else { slaset_("A", &n2, k, &c_b23, &c_b23, &q[*n1 + 1 + q_dim1], ldq); } slacpy_("A", &n12, k, &q[q_offset], ldq, &s[1], &n12); if (n12 != 0) { sgemm_("N", "N", n1, k, &n12, &c_b22, &q2[1], n1, &s[1], &n12, &c_b23, &q[q_offset], ldq); } else { slaset_("A", n1, k, &c_b23, &c_b23, &q[q_dim1 + 1], ldq); } L120: return 0; /* End of SLAED3 */ } /* slaed3_ */
/* Subroutine */ int slaed9_(integer *k, integer *kstart, integer *kstop, integer *n, real *d__, real *q, integer *ldq, real *rho, real *dlamda, real *w, real *s, integer *lds, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Oak Ridge National Lab, Argonne National Lab, Courant Institute, NAG Ltd., and Rice University September 30, 1994 Purpose ======= SLAED9 finds the roots of the secular equation, as defined by the values in D, Z, and RHO, between KSTART and KSTOP. It makes the appropriate calls to SLAED4 and then stores the new matrix of eigenvectors for use in calculating the next level of Z vectors. Arguments ========= K (input) INTEGER The number of terms in the rational function to be solved by SLAED4. K >= 0. KSTART (input) INTEGER KSTOP (input) INTEGER The updated eigenvalues Lambda(I), KSTART <= I <= KSTOP are to be computed. 1 <= KSTART <= KSTOP <= K. N (input) INTEGER The number of rows and columns in the Q matrix. N >= K (delation may result in N > K). D (output) REAL array, dimension (N) D(I) contains the updated eigenvalues for KSTART <= I <= KSTOP. Q (workspace) REAL array, dimension (LDQ,N) LDQ (input) INTEGER The leading dimension of the array Q. LDQ >= max( 1, N ). RHO (input) REAL The value of the parameter in the rank one update equation. RHO >= 0 required. DLAMDA (input) REAL array, dimension (K) The first K elements of this array contain the old roots of the deflated updating problem. These are the poles of the secular equation. W (input) REAL array, dimension (K) The first K elements of this array contain the components of the deflation-adjusted updating vector. S (output) REAL array, dimension (LDS, K) Will contain the eigenvectors of the repaired matrix which will be stored for subsequent Z vector calculation and multiplied by the previously accumulated eigenvectors to update the system. LDS (input) INTEGER The leading dimension of S. LDS >= max( 1, K ). INFO (output) INTEGER = 0: successful exit. < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if INFO = 1, an eigenvalue did not converge Further Details =============== Based on contributions by Jeff Rutter, Computer Science Division, University of California at Berkeley, USA ===================================================================== Test the input parameters. Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; /* System generated locals */ integer q_dim1, q_offset, s_dim1, s_offset, i__1, i__2; real r__1; /* Builtin functions */ double sqrt(doublereal), r_sign(real *, real *); /* Local variables */ static real temp; extern doublereal snrm2_(integer *, real *, integer *); static integer i__, j; extern /* Subroutine */ int scopy_(integer *, real *, integer *, real *, integer *), slaed4_(integer *, integer *, real *, real *, real *, real *, real *, integer *); extern doublereal slamc3_(real *, real *); extern /* Subroutine */ int xerbla_(char *, integer *); #define q_ref(a_1,a_2) q[(a_2)*q_dim1 + a_1] #define s_ref(a_1,a_2) s[(a_2)*s_dim1 + a_1] --d__; q_dim1 = *ldq; q_offset = 1 + q_dim1 * 1; q -= q_offset; --dlamda; --w; s_dim1 = *lds; s_offset = 1 + s_dim1 * 1; s -= s_offset; /* Function Body */ *info = 0; if (*k < 0) { *info = -1; } else if (*kstart < 1 || *kstart > max(1,*k)) { *info = -2; } else if (max(1,*kstop) < *kstart || *kstop > max(1,*k)) { *info = -3; } else if (*n < *k) { *info = -4; } else if (*ldq < max(1,*k)) { *info = -7; } else if (*lds < max(1,*k)) { *info = -12; } if (*info != 0) { i__1 = -(*info); xerbla_("SLAED9", &i__1); return 0; } /* Quick return if possible */ if (*k == 0) { return 0; } /* Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can be computed with high relative accuracy (barring over/underflow). This is a problem on machines without a guard digit in add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2). The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I), which on any of these machines zeros out the bottommost bit of DLAMDA(I) if it is 1; this makes the subsequent subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation occurs. On binary machines with a guard digit (almost all machines) it does not change DLAMDA(I) at all. On hexadecimal and decimal machines with a guard digit, it slightly changes the bottommost bits of DLAMDA(I). It does not account for hexadecimal or decimal machines without guard digits (we know of none). We use a subroutine call to compute 2*DLAMBDA(I) to prevent optimizing compilers from eliminating this code. */ i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { dlamda[i__] = slamc3_(&dlamda[i__], &dlamda[i__]) - dlamda[i__]; /* L10: */ } i__1 = *kstop; for (j = *kstart; j <= i__1; ++j) { slaed4_(k, &j, &dlamda[1], &w[1], &q_ref(1, j), rho, &d__[j], info); /* If the zero finder fails, the computation is terminated. */ if (*info != 0) { goto L120; } /* L20: */ } if (*k == 1 || *k == 2) { i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { i__2 = *k; for (j = 1; j <= i__2; ++j) { s_ref(j, i__) = q_ref(j, i__); /* L30: */ } /* L40: */ } goto L120; } /* Compute updated W. */ scopy_(k, &w[1], &c__1, &s[s_offset], &c__1); /* Initialize W(I) = Q(I,I) */ i__1 = *ldq + 1; scopy_(k, &q[q_offset], &i__1, &w[1], &c__1); i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = j - 1; for (i__ = 1; i__ <= i__2; ++i__) { w[i__] *= q_ref(i__, j) / (dlamda[i__] - dlamda[j]); /* L50: */ } i__2 = *k; for (i__ = j + 1; i__ <= i__2; ++i__) { w[i__] *= q_ref(i__, j) / (dlamda[i__] - dlamda[j]); /* L60: */ } /* L70: */ } i__1 = *k; for (i__ = 1; i__ <= i__1; ++i__) { r__1 = sqrt(-w[i__]); w[i__] = r_sign(&r__1, &s_ref(i__, 1)); /* L80: */ } /* Compute eigenvectors of the modified rank-1 modification. */ i__1 = *k; for (j = 1; j <= i__1; ++j) { i__2 = *k; for (i__ = 1; i__ <= i__2; ++i__) { q_ref(i__, j) = w[i__] / q_ref(i__, j); /* L90: */ } temp = snrm2_(k, &q_ref(1, j), &c__1); i__2 = *k; for (i__ = 1; i__ <= i__2; ++i__) { s_ref(i__, j) = q_ref(i__, j) / temp; /* L100: */ } /* L110: */ } L120: return 0; /* End of SLAED9 */ } /* slaed9_ */