static int run_single_test(const char *test, const uint8_t *block1, ptrdiff_t stride1, const uint8_t *block2, ptrdiff_t stride2, int align, int n) { int out, ref; av_pixelutils_sad_fn f_ref = sad_c[n - 1]; av_pixelutils_sad_fn f_out = av_pixelutils_get_sad_fn(n, n, align, NULL); switch (align) { case 0: block1++; block2++; break; case 1: block2++; break; case 2: break; } out = f_out(block1, stride1, block2, stride2); ref = f_ref(block1, stride1, block2, stride2); printf("[%s] [%c%c] SAD [%s] %dx%d=%d ref=%d\n", out == ref ? "OK" : "FAIL", align ? 'A' : 'U', align == 2 ? 'A' : 'U', test, 1<<n, 1<<n, out, ref); return out != ref; }
/* Subroutine */ int ctgsyl_(char *trans, integer *ijob, integer *m, integer * n, complex *a, integer *lda, complex *b, integer *ldb, complex *c__, integer *ldc, complex *d__, integer *ldd, complex *e, integer *lde, complex *f, integer *ldf, real *scale, real *dif, complex *work, integer *lwork, integer *iwork, integer *info) { /* -- LAPACK routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= CTGSYL solves the generalized Sylvester equation: A * R - L * B = scale * C (1) D * R - L * E = scale * F where R and L are unknown m-by-n matrices, (A, D), (B, E) and (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n, respectively, with complex entries. A, B, D and E are upper triangular (i.e., (A,D) and (B,E) in generalized Schur form). The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1 is an output scaling factor chosen to avoid overflow. In matrix notation (1) is equivalent to solve Zx = scale*b, where Z is defined as Z = [ kron(In, A) -kron(B', Im) ] (2) [ kron(In, D) -kron(E', Im) ], Here Ix is the identity matrix of size x and X' is the conjugate transpose of X. Kron(X, Y) is the Kronecker product between the matrices X and Y. If TRANS = 'C', y in the conjugate transposed system Z'*y = scale*b is solved for, which is equivalent to solve for R and L in A' * R + D' * L = scale * C (3) R * B' + L * E' = scale * -F This case (TRANS = 'C') is used to compute an one-norm-based estimate of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D) and (B,E), using CLACON. If IJOB >= 1, CTGSYL computes a Frobenius norm-based estimate of Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the reciprocal of the smallest singular value of Z. This is a level-3 BLAS algorithm. Arguments ========= TRANS (input) CHARACTER*1 = 'N': solve the generalized sylvester equation (1). = 'C': solve the "conjugate transposed" system (3). IJOB (input) INTEGER Specifies what kind of functionality to be performed. =0: solve (1) only. =1: The functionality of 0 and 3. =2: The functionality of 0 and 4. =3: Only an estimate of Dif[(A,D), (B,E)] is computed. (look ahead strategy is used). =4: Only an estimate of Dif[(A,D), (B,E)] is computed. (CGECON on sub-systems is used). Not referenced if TRANS = 'C'. M (input) INTEGER The order of the matrices A and D, and the row dimension of the matrices C, F, R and L. N (input) INTEGER The order of the matrices B and E, and the column dimension of the matrices C, F, R and L. A (input) COMPLEX array, dimension (LDA, M) The upper triangular matrix A. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1, M). B (input) COMPLEX array, dimension (LDB, N) The upper triangular matrix B. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1, N). C (input/output) COMPLEX array, dimension (LDC, N) On entry, C contains the right-hand-side of the first matrix equation in (1) or (3). On exit, if IJOB = 0, 1 or 2, C has been overwritten by the solution R. If IJOB = 3 or 4 and TRANS = 'N', C holds R, the solution achieved during the computation of the Dif-estimate. LDC (input) INTEGER The leading dimension of the array C. LDC >= max(1, M). D (input) COMPLEX array, dimension (LDD, M) The upper triangular matrix D. LDD (input) INTEGER The leading dimension of the array D. LDD >= max(1, M). E (input) COMPLEX array, dimension (LDE, N) The upper triangular matrix E. LDE (input) INTEGER The leading dimension of the array E. LDE >= max(1, N). F (input/output) COMPLEX array, dimension (LDF, N) On entry, F contains the right-hand-side of the second matrix equation in (1) or (3). On exit, if IJOB = 0, 1 or 2, F has been overwritten by the solution L. If IJOB = 3 or 4 and TRANS = 'N', F holds L, the solution achieved during the computation of the Dif-estimate. LDF (input) INTEGER The leading dimension of the array F. LDF >= max(1, M). DIF (output) REAL On exit DIF is the reciprocal of a lower bound of the reciprocal of the Dif-function, i.e. DIF is an upper bound of Dif[(A,D), (B,E)] = sigma-min(Z), where Z as in (2). IF IJOB = 0 or TRANS = 'C', DIF is not referenced. SCALE (output) REAL On exit SCALE is the scaling factor in (1) or (3). If 0 < SCALE < 1, C and F hold the solutions R and L, resp., to a slightly perturbed system but the input matrices A, B, D and E have not been changed. If SCALE = 0, R and L will hold the solutions to the homogenious system with C = F = 0. WORK (workspace/output) COMPLEX array, dimension (LWORK) IF IJOB = 0, WORK is not referenced. Otherwise, LWORK (input) INTEGER The dimension of the array WORK. LWORK > = 1. If IJOB = 1 or 2 and TRANS = 'N', LWORK >= 2*M*N. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. IWORK (workspace) INTEGER array, dimension (M+N+2) If IJOB = 0, IWORK is not referenced. INFO (output) INTEGER =0: successful exit <0: If INFO = -i, the i-th argument had an illegal value. >0: (A, D) and (B, E) have common or very close eigenvalues. Further Details =============== Based on contributions by Bo Kagstrom and Peter Poromaa, Department of Computing Science, Umea University, S-901 87 Umea, Sweden. [1] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software for Solving the Generalized Sylvester Equation and Estimating the Separation between Regular Matrix Pairs, Report UMINF - 93.23, Department of Computing Science, Umea University, S-901 87 Umea, Sweden, December 1993, Revised April 1994, Also as LAPACK Working Note 75. To appear in ACM Trans. on Math. Software, Vol 22, No 1, 1996. [2] B. Kagstrom, A Perturbation Analysis of the Generalized Sylvester Equation (AR - LB, DR - LE ) = (C, F), SIAM J. Matrix Anal. Appl., 15(4):1045-1060, 1994. [3] B. Kagstrom and L. Westin, Generalized Schur Methods with Condition Estimators for Solving the Generalized Sylvester Equation, IEEE Transactions on Automatic Control, Vol. 34, No. 7, July 1989, pp 745-751. ===================================================================== Decode and test input parameters Parameter adjustments */ /* Table of constant values */ static integer c__2 = 2; static integer c_n1 = -1; static integer c__5 = 5; static integer c__0 = 0; static integer c__1 = 1; static complex c_b16 = {0.f,0.f}; static complex c_b53 = {-1.f,0.f}; static complex c_b54 = {1.f,0.f}; /* System generated locals */ integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, d_dim1, d_offset, e_dim1, e_offset, f_dim1, f_offset, i__1, i__2, i__3, i__4; complex q__1; /* Builtin functions */ double sqrt(doublereal); /* Local variables */ static real dsum; static integer i__, j, k, p, q; extern /* Subroutine */ int cscal_(integer *, complex *, complex *, integer *), cgemm_(char *, char *, integer *, integer *, integer * , complex *, complex *, integer *, complex *, integer *, complex * , complex *, integer *); extern logical lsame_(char *, char *); static integer ifunc, linfo; extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, complex *, integer *); static integer lwmin; static real scale2; extern /* Subroutine */ int ctgsy2_(char *, integer *, integer *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, complex *, integer *, real *, real *, real *, integer *); static integer ie, je, mb, nb; static real dscale; static integer is, js, pq; static real scaloc; extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex *, integer *, complex *, integer *), xerbla_(char *, integer *); extern integer ilaenv_(integer *, char *, char *, integer *, integer *, integer *, integer *, ftnlen, ftnlen); static integer iround; static logical notran; static integer isolve; static logical lquery; #define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1 #define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)] #define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1 #define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)] #define c___subscr(a_1,a_2) (a_2)*c_dim1 + a_1 #define c___ref(a_1,a_2) c__[c___subscr(a_1,a_2)] #define d___subscr(a_1,a_2) (a_2)*d_dim1 + a_1 #define d___ref(a_1,a_2) d__[d___subscr(a_1,a_2)] #define e_subscr(a_1,a_2) (a_2)*e_dim1 + a_1 #define e_ref(a_1,a_2) e[e_subscr(a_1,a_2)] #define f_subscr(a_1,a_2) (a_2)*f_dim1 + a_1 #define f_ref(a_1,a_2) f[f_subscr(a_1,a_2)] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; b_dim1 = *ldb; b_offset = 1 + b_dim1 * 1; b -= b_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1 * 1; c__ -= c_offset; d_dim1 = *ldd; d_offset = 1 + d_dim1 * 1; d__ -= d_offset; e_dim1 = *lde; e_offset = 1 + e_dim1 * 1; e -= e_offset; f_dim1 = *ldf; f_offset = 1 + f_dim1 * 1; f -= f_offset; --work; --iwork; /* Function Body */ *info = 0; notran = lsame_(trans, "N"); lquery = *lwork == -1; if ((*ijob == 1 || *ijob == 2) && notran) { /* Computing MAX */ i__1 = 1, i__2 = (*m << 1) * *n; lwmin = max(i__1,i__2); } else { lwmin = 1; } if (! notran && ! lsame_(trans, "C")) { *info = -1; } else if (*ijob < 0 || *ijob > 4) { *info = -2; } else if (*m <= 0) { *info = -3; } else if (*n <= 0) { *info = -4; } else if (*lda < max(1,*m)) { *info = -6; } else if (*ldb < max(1,*n)) { *info = -8; } else if (*ldc < max(1,*m)) { *info = -10; } else if (*ldd < max(1,*m)) { *info = -12; } else if (*lde < max(1,*n)) { *info = -14; } else if (*ldf < max(1,*m)) { *info = -16; } else if (*lwork < lwmin && ! lquery) { *info = -20; } if (*info == 0) { work[1].r = (real) lwmin, work[1].i = 0.f; } if (*info != 0) { i__1 = -(*info); xerbla_("CTGSYL", &i__1); return 0; } else if (lquery) { return 0; } /* Determine optimal block sizes MB and NB */ mb = ilaenv_(&c__2, "CTGSYL", trans, m, n, &c_n1, &c_n1, (ftnlen)6, ( ftnlen)1); nb = ilaenv_(&c__5, "CTGSYL", trans, m, n, &c_n1, &c_n1, (ftnlen)6, ( ftnlen)1); isolve = 1; ifunc = 0; if (*ijob >= 3 && notran) { ifunc = *ijob - 2; i__1 = *n; for (j = 1; j <= i__1; ++j) { ccopy_(m, &c_b16, &c__0, &c___ref(1, j), &c__1); ccopy_(m, &c_b16, &c__0, &f_ref(1, j), &c__1); /* L10: */ } } else if (*ijob >= 1 && notran) { isolve = 2; } if (mb <= 1 && nb <= 1 || mb >= *m && nb >= *n) { /* Use unblocked Level 2 solver */ i__1 = isolve; for (iround = 1; iround <= i__1; ++iround) { *scale = 1.f; dscale = 0.f; dsum = 1.f; pq = *m * *n; ctgsy2_(trans, &ifunc, m, n, &a[a_offset], lda, &b[b_offset], ldb, &c__[c_offset], ldc, &d__[d_offset], ldd, &e[e_offset], lde, &f[f_offset], ldf, scale, &dsum, &dscale, info); if (dscale != 0.f) { if (*ijob == 1 || *ijob == 3) { *dif = sqrt((real) ((*m << 1) * *n)) / (dscale * sqrt( dsum)); } else { *dif = sqrt((real) pq) / (dscale * sqrt(dsum)); } } if (isolve == 2 && iround == 1) { ifunc = *ijob; scale2 = *scale; clacpy_("F", m, n, &c__[c_offset], ldc, &work[1], m); clacpy_("F", m, n, &f[f_offset], ldf, &work[*m * *n + 1], m); i__2 = *n; for (j = 1; j <= i__2; ++j) { ccopy_(m, &c_b16, &c__0, &c___ref(1, j), &c__1); ccopy_(m, &c_b16, &c__0, &f_ref(1, j), &c__1); /* L20: */ } } else if (isolve == 2 && iround == 2) { clacpy_("F", m, n, &work[1], m, &c__[c_offset], ldc); clacpy_("F", m, n, &work[*m * *n + 1], m, &f[f_offset], ldf); *scale = scale2; } /* L30: */ } return 0; } /* Determine block structure of A */ p = 0; i__ = 1; L40: if (i__ > *m) { goto L50; } ++p; iwork[p] = i__; i__ += mb; if (i__ >= *m) { goto L50; } goto L40; L50: iwork[p + 1] = *m + 1; if (iwork[p] == iwork[p + 1]) { --p; } /* Determine block structure of B */ q = p + 1; j = 1; L60: if (j > *n) { goto L70; } ++q; iwork[q] = j; j += nb; if (j >= *n) { goto L70; } goto L60; L70: iwork[q + 1] = *n + 1; if (iwork[q] == iwork[q + 1]) { --q; } if (notran) { i__1 = isolve; for (iround = 1; iround <= i__1; ++iround) { /* Solve (I, J) - subsystem A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J) D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J) for I = P, P - 1, ..., 1; J = 1, 2, ..., Q */ pq = 0; *scale = 1.f; dscale = 0.f; dsum = 1.f; i__2 = q; for (j = p + 2; j <= i__2; ++j) { js = iwork[j]; je = iwork[j + 1] - 1; nb = je - js + 1; for (i__ = p; i__ >= 1; --i__) { is = iwork[i__]; ie = iwork[i__ + 1] - 1; mb = ie - is + 1; ctgsy2_(trans, &ifunc, &mb, &nb, &a_ref(is, is), lda, & b_ref(js, js), ldb, &c___ref(is, js), ldc, & d___ref(is, is), ldd, &e_ref(js, js), lde, &f_ref( is, js), ldf, &scaloc, &dsum, &dscale, &linfo); if (linfo > 0) { *info = linfo; } pq += mb * nb; if (scaloc != 1.f) { i__3 = js - 1; for (k = 1; k <= i__3; ++k) { q__1.r = scaloc, q__1.i = 0.f; cscal_(m, &q__1, &c___ref(1, k), &c__1); q__1.r = scaloc, q__1.i = 0.f; cscal_(m, &q__1, &f_ref(1, k), &c__1); /* L80: */ } i__3 = je; for (k = js; k <= i__3; ++k) { i__4 = is - 1; q__1.r = scaloc, q__1.i = 0.f; cscal_(&i__4, &q__1, &c___ref(1, k), &c__1); i__4 = is - 1; q__1.r = scaloc, q__1.i = 0.f; cscal_(&i__4, &q__1, &f_ref(1, k), &c__1); /* L90: */ } i__3 = je; for (k = js; k <= i__3; ++k) { i__4 = *m - ie; q__1.r = scaloc, q__1.i = 0.f; cscal_(&i__4, &q__1, &c___ref(ie + 1, k), &c__1); i__4 = *m - ie; q__1.r = scaloc, q__1.i = 0.f; cscal_(&i__4, &q__1, &f_ref(ie + 1, k), &c__1); /* L100: */ } i__3 = *n; for (k = je + 1; k <= i__3; ++k) { q__1.r = scaloc, q__1.i = 0.f; cscal_(m, &q__1, &c___ref(1, k), &c__1); q__1.r = scaloc, q__1.i = 0.f; cscal_(m, &q__1, &f_ref(1, k), &c__1); /* L110: */ } *scale *= scaloc; } /* Substitute R(I,J) and L(I,J) into remaining equation. */ if (i__ > 1) { i__3 = is - 1; cgemm_("N", "N", &i__3, &nb, &mb, &c_b53, &a_ref(1, is), lda, &c___ref(is, js), ldc, &c_b54, & c___ref(1, js), ldc); i__3 = is - 1; cgemm_("N", "N", &i__3, &nb, &mb, &c_b53, &d___ref(1, is), ldd, &c___ref(is, js), ldc, &c_b54, & f_ref(1, js), ldf); } if (j < q) { i__3 = *n - je; cgemm_("N", "N", &mb, &i__3, &nb, &c_b54, &f_ref(is, js), ldf, &b_ref(js, je + 1), ldb, &c_b54, & c___ref(is, je + 1), ldc); i__3 = *n - je; cgemm_("N", "N", &mb, &i__3, &nb, &c_b54, &f_ref(is, js), ldf, &e_ref(js, je + 1), lde, &c_b54, & f_ref(is, je + 1), ldf); } /* L120: */ } /* L130: */ } if (dscale != 0.f) { if (*ijob == 1 || *ijob == 3) { *dif = sqrt((real) ((*m << 1) * *n)) / (dscale * sqrt( dsum)); } else { *dif = sqrt((real) pq) / (dscale * sqrt(dsum)); } } if (isolve == 2 && iround == 1) { ifunc = *ijob; scale2 = *scale; clacpy_("F", m, n, &c__[c_offset], ldc, &work[1], m); clacpy_("F", m, n, &f[f_offset], ldf, &work[*m * *n + 1], m); i__2 = *n; for (j = 1; j <= i__2; ++j) { ccopy_(m, &c_b16, &c__0, &c___ref(1, j), &c__1); ccopy_(m, &c_b16, &c__0, &f_ref(1, j), &c__1); /* L140: */ } } else if (isolve == 2 && iround == 2) { clacpy_("F", m, n, &work[1], m, &c__[c_offset], ldc); clacpy_("F", m, n, &work[*m * *n + 1], m, &f[f_offset], ldf); *scale = scale2; } /* L150: */ } } else { /* Solve transposed (I, J)-subsystem A(I, I)' * R(I, J) + D(I, I)' * L(I, J) = C(I, J) R(I, J) * B(J, J) + L(I, J) * E(J, J) = -F(I, J) for I = 1,2,..., P; J = Q, Q-1,..., 1 */ *scale = 1.f; i__1 = p; for (i__ = 1; i__ <= i__1; ++i__) { is = iwork[i__]; ie = iwork[i__ + 1] - 1; mb = ie - is + 1; i__2 = p + 2; for (j = q; j >= i__2; --j) { js = iwork[j]; je = iwork[j + 1] - 1; nb = je - js + 1; ctgsy2_(trans, &ifunc, &mb, &nb, &a_ref(is, is), lda, &b_ref( js, js), ldb, &c___ref(is, js), ldc, &d___ref(is, is), ldd, &e_ref(js, js), lde, &f_ref(is, js), ldf, & scaloc, &dsum, &dscale, &linfo); if (linfo > 0) { *info = linfo; } if (scaloc != 1.f) { i__3 = js - 1; for (k = 1; k <= i__3; ++k) { q__1.r = scaloc, q__1.i = 0.f; cscal_(m, &q__1, &c___ref(1, k), &c__1); q__1.r = scaloc, q__1.i = 0.f; cscal_(m, &q__1, &f_ref(1, k), &c__1); /* L160: */ } i__3 = je; for (k = js; k <= i__3; ++k) { i__4 = is - 1; q__1.r = scaloc, q__1.i = 0.f; cscal_(&i__4, &q__1, &c___ref(1, k), &c__1); i__4 = is - 1; q__1.r = scaloc, q__1.i = 0.f; cscal_(&i__4, &q__1, &f_ref(1, k), &c__1); /* L170: */ } i__3 = je; for (k = js; k <= i__3; ++k) { i__4 = *m - ie; q__1.r = scaloc, q__1.i = 0.f; cscal_(&i__4, &q__1, &c___ref(ie + 1, k), &c__1); i__4 = *m - ie; q__1.r = scaloc, q__1.i = 0.f; cscal_(&i__4, &q__1, &f_ref(ie + 1, k), &c__1); /* L180: */ } i__3 = *n; for (k = je + 1; k <= i__3; ++k) { q__1.r = scaloc, q__1.i = 0.f; cscal_(m, &q__1, &c___ref(1, k), &c__1); q__1.r = scaloc, q__1.i = 0.f; cscal_(m, &q__1, &f_ref(1, k), &c__1); /* L190: */ } *scale *= scaloc; } /* Substitute R(I,J) and L(I,J) into remaining equation. */ if (j > p + 2) { i__3 = js - 1; cgemm_("N", "C", &mb, &i__3, &nb, &c_b54, &c___ref(is, js) , ldc, &b_ref(1, js), ldb, &c_b54, &f_ref(is, 1), ldf); i__3 = js - 1; cgemm_("N", "C", &mb, &i__3, &nb, &c_b54, &f_ref(is, js), ldf, &e_ref(1, js), lde, &c_b54, &f_ref(is, 1), ldf); } if (i__ < p) { i__3 = *m - ie; cgemm_("C", "N", &i__3, &nb, &mb, &c_b53, &a_ref(is, ie + 1), lda, &c___ref(is, js), ldc, &c_b54, &c___ref( ie + 1, js), ldc); i__3 = *m - ie; cgemm_("C", "N", &i__3, &nb, &mb, &c_b53, &d___ref(is, ie + 1), ldd, &f_ref(is, js), ldf, &c_b54, &c___ref( ie + 1, js), ldc); } /* L200: */ } /* L210: */ } } work[1].r = (real) lwmin, work[1].i = 0.f; return 0; /* End of CTGSYL */ } /* ctgsyl_ */
/* Subroutine */ int slaqps_(integer *m, integer *n, integer *offset, integer *nb, integer *kb, real *a, integer *lda, integer *jpvt, real *tau, real *vn1, real *vn2, real *auxv, real *f, integer *ldf) { /* -- LAPACK auxiliary routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University June 30, 1999 Purpose ======= SLAQPS computes a step of QR factorization with column pivoting of a real M-by-N matrix A by using Blas-3. It tries to factorize NB columns from A starting from the row OFFSET+1, and updates all of the matrix with Blas-3 xGEMM. In some cases, due to catastrophic cancellations, it cannot factorize NB columns. Hence, the actual number of factorized columns is returned in KB. Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized. Arguments ========= M (input) INTEGER The number of rows of the matrix A. M >= 0. N (input) INTEGER The number of columns of the matrix A. N >= 0 OFFSET (input) INTEGER The number of rows of A that have been factorized in previous steps. NB (input) INTEGER The number of columns to factorize. KB (output) INTEGER The number of columns actually factorized. A (input/output) REAL array, dimension (LDA,N) On entry, the M-by-N matrix A. On exit, block A(OFFSET+1:M,1:KB) is the triangular factor obtained and block A(1:OFFSET,1:N) has been accordingly pivoted, but no factorized. The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has been updated. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,M). JPVT (input/output) INTEGER array, dimension (N) JPVT(I) = K <==> Column K of the full matrix A has been permuted into position I in AP. TAU (output) REAL array, dimension (KB) The scalar factors of the elementary reflectors. VN1 (input/output) REAL array, dimension (N) The vector with the partial column norms. VN2 (input/output) REAL array, dimension (N) The vector with the exact column norms. AUXV (input/output) REAL array, dimension (NB) Auxiliar vector. F (input/output) REAL array, dimension (LDF,NB) Matrix F' = L*Y'*A. LDF (input) INTEGER The leading dimension of the array F. LDF >= max(1,N). Further Details =============== Based on contributions by G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain X. Sun, Computer Science Dept., Duke University, USA ===================================================================== Parameter adjustments */ /* Table of constant values */ static integer c__1 = 1; static real c_b7 = -1.f; static real c_b8 = 1.f; static real c_b15 = 0.f; /* System generated locals */ integer a_dim1, a_offset, f_dim1, f_offset, i__1, i__2; real r__1, r__2; /* Builtin functions */ double sqrt(doublereal); integer i_nint(real *); /* Local variables */ static real temp, temp2; extern doublereal snrm2_(integer *, real *, integer *); static integer j, k; extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *); static integer itemp; extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, real *, integer *, real *, integer *, real *, real *, integer *), sswap_(integer *, real *, integer *, real *, integer *); static integer rk; extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *, real *); static integer lsticc; extern integer isamax_(integer *, real *, integer *); static integer lastrk; static real akk; static integer pvt; #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1] #define f_ref(a_1,a_2) f[(a_2)*f_dim1 + a_1] a_dim1 = *lda; a_offset = 1 + a_dim1 * 1; a -= a_offset; --jpvt; --tau; --vn1; --vn2; --auxv; f_dim1 = *ldf; f_offset = 1 + f_dim1 * 1; f -= f_offset; /* Function Body Computing MIN */ i__1 = *m, i__2 = *n + *offset; lastrk = min(i__1,i__2); lsticc = 0; k = 0; /* Beginning of while loop. */ L10: if (k < *nb && lsticc == 0) { ++k; rk = *offset + k; /* Determine ith pivot column and swap if necessary */ i__1 = *n - k + 1; pvt = k - 1 + isamax_(&i__1, &vn1[k], &c__1); if (pvt != k) { sswap_(m, &a_ref(1, pvt), &c__1, &a_ref(1, k), &c__1); i__1 = k - 1; sswap_(&i__1, &f_ref(pvt, 1), ldf, &f_ref(k, 1), ldf); itemp = jpvt[pvt]; jpvt[pvt] = jpvt[k]; jpvt[k] = itemp; vn1[pvt] = vn1[k]; vn2[pvt] = vn2[k]; } /* Apply previous Householder reflectors to column K: A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)'. */ if (k > 1) { i__1 = *m - rk + 1; i__2 = k - 1; sgemv_("No transpose", &i__1, &i__2, &c_b7, &a_ref(rk, 1), lda, & f_ref(k, 1), ldf, &c_b8, &a_ref(rk, k), &c__1) ; } /* Generate elementary reflector H(k). */ if (rk < *m) { i__1 = *m - rk + 1; slarfg_(&i__1, &a_ref(rk, k), &a_ref(rk + 1, k), &c__1, &tau[k]); } else { slarfg_(&c__1, &a_ref(rk, k), &a_ref(rk, k), &c__1, &tau[k]); } akk = a_ref(rk, k); a_ref(rk, k) = 1.f; /* Compute Kth column of F: Compute F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)'*A(RK:M,K). */ if (k < *n) { i__1 = *m - rk + 1; i__2 = *n - k; sgemv_("Transpose", &i__1, &i__2, &tau[k], &a_ref(rk, k + 1), lda, &a_ref(rk, k), &c__1, &c_b15, &f_ref(k + 1, k), &c__1); } /* Padding F(1:K,K) with zeros. */ i__1 = k; for (j = 1; j <= i__1; ++j) { f_ref(j, k) = 0.f; /* L20: */ } /* Incremental updating of F: F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)' *A(RK:M,K). */ if (k > 1) { i__1 = *m - rk + 1; i__2 = k - 1; r__1 = -tau[k]; sgemv_("Transpose", &i__1, &i__2, &r__1, &a_ref(rk, 1), lda, & a_ref(rk, k), &c__1, &c_b15, &auxv[1], &c__1); i__1 = k - 1; sgemv_("No transpose", n, &i__1, &c_b8, &f_ref(1, 1), ldf, &auxv[ 1], &c__1, &c_b8, &f_ref(1, k), &c__1); } /* Update the current row of A: A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)'. */ if (k < *n) { i__1 = *n - k; sgemv_("No transpose", &i__1, &k, &c_b7, &f_ref(k + 1, 1), ldf, & a_ref(rk, 1), lda, &c_b8, &a_ref(rk, k + 1), lda); } /* Update partial column norms. */ if (rk < lastrk) { i__1 = *n; for (j = k + 1; j <= i__1; ++j) { if (vn1[j] != 0.f) { temp = (r__1 = a_ref(rk, j), dabs(r__1)) / vn1[j]; /* Computing MAX */ r__1 = 0.f, r__2 = (temp + 1.f) * (1.f - temp); temp = dmax(r__1,r__2); /* Computing 2nd power */ r__1 = vn1[j] / vn2[j]; temp2 = temp * .05f * (r__1 * r__1) + 1.f; if (temp2 == 1.f) { vn2[j] = (real) lsticc; lsticc = j; } else { vn1[j] *= sqrt(temp); } } /* L30: */ } } a_ref(rk, k) = akk; /* End of while loop. */ goto L10; } *kb = k; rk = *offset + *kb; /* Apply the block reflector to the rest of the matrix: A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) - A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)'. Computing MIN */ i__1 = *n, i__2 = *m - *offset; if (*kb < min(i__1,i__2)) { i__1 = *m - rk; i__2 = *n - *kb; sgemm_("No transpose", "Transpose", &i__1, &i__2, kb, &c_b7, &a_ref( rk + 1, 1), lda, &f_ref(*kb + 1, 1), ldf, &c_b8, &a_ref(rk + 1, *kb + 1), lda); } /* Recomputation of difficult columns. */ L40: if (lsticc > 0) { itemp = i_nint(&vn2[lsticc]); i__1 = *m - rk; vn1[lsticc] = snrm2_(&i__1, &a_ref(rk + 1, lsticc), &c__1); vn2[lsticc] = vn1[lsticc]; lsticc = itemp; goto L40; } return 0; /* End of SLAQPS */ } /* slaqps_ */
/* Subroutine */ int dchkgk_(integer *nin, integer *nout) { /* Format strings */ static char fmt_9999[] = "(1x,\002.. test output of DGGBAK .. \002)"; static char fmt_9998[] = "(\002 value of largest test error " " =\002,d12.3)"; static char fmt_9997[] = "(\002 example number where DGGBAL info is not " "0 =\002,i4)"; static char fmt_9996[] = "(\002 example number where DGGBAK(L) info is n" "ot 0 =\002,i4)"; static char fmt_9995[] = "(\002 example number where DGGBAK(R) info is n" "ot 0 =\002,i4)"; static char fmt_9994[] = "(\002 example number having largest error " " =\002,i4)"; static char fmt_9993[] = "(\002 number of examples where info is not 0 " " =\002,i4)"; static char fmt_9992[] = "(\002 total number of examples tested " " =\002,i4)"; /* System generated locals */ integer i__1, i__2; doublereal d__1, d__2, d__3; /* Builtin functions */ integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), e_rsle(void), s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, char *, ftnlen); /* Local variables */ static integer info, lmax[4]; static doublereal rmax, vmax, work[2500] /* was [50][50] */, a[2500] /* was [50][50] */, b[2500] /* was [50][50] */, e[2500] /* was [50][50] */, f[2500] /* was [50][50] */; static integer i__, j, m, n; extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, doublereal *, doublereal *, integer *); static integer ninfo; static doublereal anorm, bnorm, af[2500] /* was [50][50] */, bf[2500] /* was [50][50] */; extern /* Subroutine */ int dggbak_(char *, char *, integer *, integer *, integer *, doublereal *, doublereal *, integer *, doublereal *, integer *, integer *), dggbal_(char *, integer *, doublereal *, integer *, doublereal *, integer *, integer *, integer *, doublereal *, doublereal *, doublereal *, integer *); extern doublereal dlamch_(char *), dlange_(char *, integer *, integer *, doublereal *, integer *, doublereal *); static doublereal vl[2500] /* was [50][50] */, lscale[50], vr[2500] /* was [50][50] */, rscale[50]; extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, doublereal *, integer *, doublereal *, integer *); static integer ihi, ilo; static doublereal eps, vlf[2500] /* was [50][50] */; static integer knt; static doublereal vrf[2500] /* was [50][50] */; /* Fortran I/O blocks */ static cilist io___6 = { 0, 0, 0, 0, 0 }; static cilist io___10 = { 0, 0, 0, 0, 0 }; static cilist io___13 = { 0, 0, 0, 0, 0 }; static cilist io___15 = { 0, 0, 0, 0, 0 }; static cilist io___17 = { 0, 0, 0, 0, 0 }; static cilist io___34 = { 0, 0, 0, fmt_9999, 0 }; static cilist io___35 = { 0, 0, 0, fmt_9998, 0 }; static cilist io___36 = { 0, 0, 0, fmt_9997, 0 }; static cilist io___37 = { 0, 0, 0, fmt_9996, 0 }; static cilist io___38 = { 0, 0, 0, fmt_9995, 0 }; static cilist io___39 = { 0, 0, 0, fmt_9994, 0 }; static cilist io___40 = { 0, 0, 0, fmt_9993, 0 }; static cilist io___41 = { 0, 0, 0, fmt_9992, 0 }; #define a_ref(a_1,a_2) a[(a_2)*50 + a_1 - 51] #define b_ref(a_1,a_2) b[(a_2)*50 + a_1 - 51] #define e_ref(a_1,a_2) e[(a_2)*50 + a_1 - 51] #define f_ref(a_1,a_2) f[(a_2)*50 + a_1 - 51] #define vl_ref(a_1,a_2) vl[(a_2)*50 + a_1 - 51] #define vr_ref(a_1,a_2) vr[(a_2)*50 + a_1 - 51] /* -- LAPACK test routine (version 3.0) -- Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., Courant Institute, Argonne National Lab, and Rice University September 30, 1994 Purpose ======= DCHKGK tests DGGBAK, a routine for backward balancing of a matrix pair (A, B). Arguments ========= NIN (input) INTEGER The logical unit number for input. NIN > 0. NOUT (input) INTEGER The logical unit number for output. NOUT > 0. ===================================================================== Initialization */ lmax[0] = 0; lmax[1] = 0; lmax[2] = 0; lmax[3] = 0; ninfo = 0; knt = 0; rmax = 0.; eps = dlamch_("Precision"); L10: io___6.ciunit = *nin; s_rsle(&io___6); do_lio(&c__3, &c__1, (char *)&n, (ftnlen)sizeof(integer)); do_lio(&c__3, &c__1, (char *)&m, (ftnlen)sizeof(integer)); e_rsle(); if (n == 0) { goto L100; } i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { io___10.ciunit = *nin; s_rsle(&io___10); i__2 = n; for (j = 1; j <= i__2; ++j) { do_lio(&c__5, &c__1, (char *)&a_ref(i__, j), (ftnlen)sizeof( doublereal)); } e_rsle(); /* L20: */ } i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { io___13.ciunit = *nin; s_rsle(&io___13); i__2 = n; for (j = 1; j <= i__2; ++j) { do_lio(&c__5, &c__1, (char *)&b_ref(i__, j), (ftnlen)sizeof( doublereal)); } e_rsle(); /* L30: */ } i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { io___15.ciunit = *nin; s_rsle(&io___15); i__2 = m; for (j = 1; j <= i__2; ++j) { do_lio(&c__5, &c__1, (char *)&vl_ref(i__, j), (ftnlen)sizeof( doublereal)); } e_rsle(); /* L40: */ } i__1 = n; for (i__ = 1; i__ <= i__1; ++i__) { io___17.ciunit = *nin; s_rsle(&io___17); i__2 = m; for (j = 1; j <= i__2; ++j) { do_lio(&c__5, &c__1, (char *)&vr_ref(i__, j), (ftnlen)sizeof( doublereal)); } e_rsle(); /* L50: */ } ++knt; anorm = dlange_("M", &n, &n, a, &c__50, work); bnorm = dlange_("M", &n, &n, b, &c__50, work); dlacpy_("FULL", &n, &n, a, &c__50, af, &c__50); dlacpy_("FULL", &n, &n, b, &c__50, bf, &c__50); dggbal_("B", &n, a, &c__50, b, &c__50, &ilo, &ihi, lscale, rscale, work, & info); if (info != 0) { ++ninfo; lmax[0] = knt; } dlacpy_("FULL", &n, &m, vl, &c__50, vlf, &c__50); dlacpy_("FULL", &n, &m, vr, &c__50, vrf, &c__50); dggbak_("B", "L", &n, &ilo, &ihi, lscale, rscale, &m, vl, &c__50, &info); if (info != 0) { ++ninfo; lmax[1] = knt; } dggbak_("B", "R", &n, &ilo, &ihi, lscale, rscale, &m, vr, &c__50, &info); if (info != 0) { ++ninfo; lmax[2] = knt; } /* Test of DGGBAK Check tilde(VL)'*A*tilde(VR) - VL'*tilde(A)*VR where tilde(A) denotes the transformed matrix. */ dgemm_("N", "N", &n, &m, &n, &c_b52, af, &c__50, vr, &c__50, &c_b55, work, &c__50); dgemm_("T", "N", &m, &m, &n, &c_b52, vl, &c__50, work, &c__50, &c_b55, e, &c__50); dgemm_("N", "N", &n, &m, &n, &c_b52, a, &c__50, vrf, &c__50, &c_b55, work, &c__50); dgemm_("T", "N", &m, &m, &n, &c_b52, vlf, &c__50, work, &c__50, &c_b55, f, &c__50); vmax = 0.; i__1 = m; for (j = 1; j <= i__1; ++j) { i__2 = m; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = vmax, d__3 = (d__1 = e_ref(i__, j) - f_ref(i__, j), abs( d__1)); vmax = max(d__2,d__3); /* L60: */ } /* L70: */ } vmax /= eps * max(anorm,bnorm); if (vmax > rmax) { lmax[3] = knt; rmax = vmax; } /* Check tilde(VL)'*B*tilde(VR) - VL'*tilde(B)*VR */ dgemm_("N", "N", &n, &m, &n, &c_b52, bf, &c__50, vr, &c__50, &c_b55, work, &c__50); dgemm_("T", "N", &m, &m, &n, &c_b52, vl, &c__50, work, &c__50, &c_b55, e, &c__50); dgemm_("N", "N", &n, &m, &n, &c_b52, b, &c__50, vrf, &c__50, &c_b55, work, &c__50); dgemm_("T", "N", &m, &m, &n, &c_b52, vlf, &c__50, work, &c__50, &c_b55, f, &c__50); vmax = 0.; i__1 = m; for (j = 1; j <= i__1; ++j) { i__2 = m; for (i__ = 1; i__ <= i__2; ++i__) { /* Computing MAX */ d__2 = vmax, d__3 = (d__1 = e_ref(i__, j) - f_ref(i__, j), abs( d__1)); vmax = max(d__2,d__3); /* L80: */ } /* L90: */ } vmax /= eps * max(anorm,bnorm); if (vmax > rmax) { lmax[3] = knt; rmax = vmax; } goto L10; L100: io___34.ciunit = *nout; s_wsfe(&io___34); e_wsfe(); io___35.ciunit = *nout; s_wsfe(&io___35); do_fio(&c__1, (char *)&rmax, (ftnlen)sizeof(doublereal)); e_wsfe(); io___36.ciunit = *nout; s_wsfe(&io___36); do_fio(&c__1, (char *)&lmax[0], (ftnlen)sizeof(integer)); e_wsfe(); io___37.ciunit = *nout; s_wsfe(&io___37); do_fio(&c__1, (char *)&lmax[1], (ftnlen)sizeof(integer)); e_wsfe(); io___38.ciunit = *nout; s_wsfe(&io___38); do_fio(&c__1, (char *)&lmax[2], (ftnlen)sizeof(integer)); e_wsfe(); io___39.ciunit = *nout; s_wsfe(&io___39); do_fio(&c__1, (char *)&lmax[3], (ftnlen)sizeof(integer)); e_wsfe(); io___40.ciunit = *nout; s_wsfe(&io___40); do_fio(&c__1, (char *)&ninfo, (ftnlen)sizeof(integer)); e_wsfe(); io___41.ciunit = *nout; s_wsfe(&io___41); do_fio(&c__1, (char *)&knt, (ftnlen)sizeof(integer)); e_wsfe(); return 0; /* End of DCHKGK */ } /* dchkgk_ */