Example #1
0
static int run_single_test(const char *test,
                           const uint8_t *block1, ptrdiff_t stride1,
                           const uint8_t *block2, ptrdiff_t stride2,
                           int align, int n)
{
    int out, ref;
    av_pixelutils_sad_fn f_ref = sad_c[n - 1];
    av_pixelutils_sad_fn f_out = av_pixelutils_get_sad_fn(n, n, align, NULL);

    switch (align) {
    case 0: block1++; block2++; break;
    case 1:           block2++; break;
    case 2:                     break;
    }

    out = f_out(block1, stride1, block2, stride2);
    ref = f_ref(block1, stride1, block2, stride2);
    printf("[%s] [%c%c] SAD [%s] %dx%d=%d ref=%d\n",
           out == ref ? "OK" : "FAIL",
           align ? 'A' : 'U', align == 2 ? 'A' : 'U',
           test, 1<<n, 1<<n, out, ref);
    return out != ref;
}
Example #2
0
/* Subroutine */ int ctgsyl_(char *trans, integer *ijob, integer *m, integer *
	n, complex *a, integer *lda, complex *b, integer *ldb, complex *c__, 
	integer *ldc, complex *d__, integer *ldd, complex *e, integer *lde, 
	complex *f, integer *ldf, real *scale, real *dif, complex *work, 
	integer *lwork, integer *iwork, integer *info)
{
/*  -- LAPACK routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    CTGSYL solves the generalized Sylvester equation:   

                A * R - L * B = scale * C            (1)   
                D * R - L * E = scale * F   

    where R and L are unknown m-by-n matrices, (A, D), (B, E) and   
    (C, F) are given matrix pairs of size m-by-m, n-by-n and m-by-n,   
    respectively, with complex entries. A, B, D and E are upper   
    triangular (i.e., (A,D) and (B,E) in generalized Schur form).   

    The solution (R, L) overwrites (C, F). 0 <= SCALE <= 1   
    is an output scaling factor chosen to avoid overflow.   

    In matrix notation (1) is equivalent to solve Zx = scale*b, where Z   
    is defined as   

           Z = [ kron(In, A)  -kron(B', Im) ]        (2)   
               [ kron(In, D)  -kron(E', Im) ],   

    Here Ix is the identity matrix of size x and X' is the conjugate   
    transpose of X. Kron(X, Y) is the Kronecker product between the   
    matrices X and Y.   

    If TRANS = 'C', y in the conjugate transposed system Z'*y = scale*b   
    is solved for, which is equivalent to solve for R and L in   

                A' * R + D' * L = scale * C           (3)   
                R * B' + L * E' = scale * -F   

    This case (TRANS = 'C') is used to compute an one-norm-based estimate   
    of Dif[(A,D), (B,E)], the separation between the matrix pairs (A,D)   
    and (B,E), using CLACON.   

    If IJOB >= 1, CTGSYL computes a Frobenius norm-based estimate of   
    Dif[(A,D),(B,E)]. That is, the reciprocal of a lower bound on the   
    reciprocal of the smallest singular value of Z.   

    This is a level-3 BLAS algorithm.   

    Arguments   
    =========   

    TRANS   (input) CHARACTER*1   
            = 'N': solve the generalized sylvester equation (1).   
            = 'C': solve the "conjugate transposed" system (3).   

    IJOB    (input) INTEGER   
            Specifies what kind of functionality to be performed.   
            =0: solve (1) only.   
            =1: The functionality of 0 and 3.   
            =2: The functionality of 0 and 4.   
            =3: Only an estimate of Dif[(A,D), (B,E)] is computed.   
                (look ahead strategy is used).   
            =4: Only an estimate of Dif[(A,D), (B,E)] is computed.   
                (CGECON on sub-systems is used).   
            Not referenced if TRANS = 'C'.   

    M       (input) INTEGER   
            The order of the matrices A and D, and the row dimension of   
            the matrices C, F, R and L.   

    N       (input) INTEGER   
            The order of the matrices B and E, and the column dimension   
            of the matrices C, F, R and L.   

    A       (input) COMPLEX array, dimension (LDA, M)   
            The upper triangular matrix A.   

    LDA     (input) INTEGER   
            The leading dimension of the array A. LDA >= max(1, M).   

    B       (input) COMPLEX array, dimension (LDB, N)   
            The upper triangular matrix B.   

    LDB     (input) INTEGER   
            The leading dimension of the array B. LDB >= max(1, N).   

    C       (input/output) COMPLEX array, dimension (LDC, N)   
            On entry, C contains the right-hand-side of the first matrix   
            equation in (1) or (3).   
            On exit, if IJOB = 0, 1 or 2, C has been overwritten by   
            the solution R. If IJOB = 3 or 4 and TRANS = 'N', C holds R,   
            the solution achieved during the computation of the   
            Dif-estimate.   

    LDC     (input) INTEGER   
            The leading dimension of the array C. LDC >= max(1, M).   

    D       (input) COMPLEX array, dimension (LDD, M)   
            The upper triangular matrix D.   

    LDD     (input) INTEGER   
            The leading dimension of the array D. LDD >= max(1, M).   

    E       (input) COMPLEX array, dimension (LDE, N)   
            The upper triangular matrix E.   

    LDE     (input) INTEGER   
            The leading dimension of the array E. LDE >= max(1, N).   

    F       (input/output) COMPLEX array, dimension (LDF, N)   
            On entry, F contains the right-hand-side of the second matrix   
            equation in (1) or (3).   
            On exit, if IJOB = 0, 1 or 2, F has been overwritten by   
            the solution L. If IJOB = 3 or 4 and TRANS = 'N', F holds L,   
            the solution achieved during the computation of the   
            Dif-estimate.   

    LDF     (input) INTEGER   
            The leading dimension of the array F. LDF >= max(1, M).   

    DIF     (output) REAL   
            On exit DIF is the reciprocal of a lower bound of the   
            reciprocal of the Dif-function, i.e. DIF is an upper bound of   
            Dif[(A,D), (B,E)] = sigma-min(Z), where Z as in (2).   
            IF IJOB = 0 or TRANS = 'C', DIF is not referenced.   

    SCALE   (output) REAL   
            On exit SCALE is the scaling factor in (1) or (3).   
            If 0 < SCALE < 1, C and F hold the solutions R and L, resp.,   
            to a slightly perturbed system but the input matrices A, B,   
            D and E have not been changed. If SCALE = 0, R and L will   
            hold the solutions to the homogenious system with C = F = 0.   

    WORK    (workspace/output) COMPLEX array, dimension (LWORK)   
            IF IJOB = 0, WORK is not referenced.  Otherwise,   

    LWORK   (input) INTEGER   
            The dimension of the array WORK. LWORK > = 1.   
            If IJOB = 1 or 2 and TRANS = 'N', LWORK >= 2*M*N.   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    IWORK   (workspace) INTEGER array, dimension (M+N+2)   
            If IJOB = 0, IWORK is not referenced.   

    INFO    (output) INTEGER   
              =0: successful exit   
              <0: If INFO = -i, the i-th argument had an illegal value.   
              >0: (A, D) and (B, E) have common or very close   
                  eigenvalues.   

    Further Details   
    ===============   

    Based on contributions by   
       Bo Kagstrom and Peter Poromaa, Department of Computing Science,   
       Umea University, S-901 87 Umea, Sweden.   

    [1] B. Kagstrom and P. Poromaa, LAPACK-Style Algorithms and Software   
        for Solving the Generalized Sylvester Equation and Estimating the   
        Separation between Regular Matrix Pairs, Report UMINF - 93.23,   
        Department of Computing Science, Umea University, S-901 87 Umea,   
        Sweden, December 1993, Revised April 1994, Also as LAPACK Working   
        Note 75.  To appear in ACM Trans. on Math. Software, Vol 22,   
        No 1, 1996.   

    [2] B. Kagstrom, A Perturbation Analysis of the Generalized Sylvester   
        Equation (AR - LB, DR - LE ) = (C, F), SIAM J. Matrix Anal.   
        Appl., 15(4):1045-1060, 1994.   

    [3] B. Kagstrom and L. Westin, Generalized Schur Methods with   
        Condition Estimators for Solving the Generalized Sylvester   
        Equation, IEEE Transactions on Automatic Control, Vol. 34, No. 7,   
        July 1989, pp 745-751.   

    =====================================================================   


       Decode and test input parameters   

       Parameter adjustments */
    /* Table of constant values */
    static integer c__2 = 2;
    static integer c_n1 = -1;
    static integer c__5 = 5;
    static integer c__0 = 0;
    static integer c__1 = 1;
    static complex c_b16 = {0.f,0.f};
    static complex c_b53 = {-1.f,0.f};
    static complex c_b54 = {1.f,0.f};
    
    /* System generated locals */
    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, d_dim1, 
	    d_offset, e_dim1, e_offset, f_dim1, f_offset, i__1, i__2, i__3, 
	    i__4;
    complex q__1;
    /* Builtin functions */
    double sqrt(doublereal);
    /* Local variables */
    static real dsum;
    static integer i__, j, k, p, q;
    extern /* Subroutine */ int cscal_(integer *, complex *, complex *, 
	    integer *), cgemm_(char *, char *, integer *, integer *, integer *
	    , complex *, complex *, integer *, complex *, integer *, complex *
	    , complex *, integer *);
    extern logical lsame_(char *, char *);
    static integer ifunc, linfo;
    extern /* Subroutine */ int ccopy_(integer *, complex *, integer *, 
	    complex *, integer *);
    static integer lwmin;
    static real scale2;
    extern /* Subroutine */ int ctgsy2_(char *, integer *, integer *, integer 
	    *, complex *, integer *, complex *, integer *, complex *, integer 
	    *, complex *, integer *, complex *, integer *, complex *, integer 
	    *, real *, real *, real *, integer *);
    static integer ie, je, mb, nb;
    static real dscale;
    static integer is, js, pq;
    static real scaloc;
    extern /* Subroutine */ int clacpy_(char *, integer *, integer *, complex 
	    *, integer *, complex *, integer *), xerbla_(char *, 
	    integer *);
    extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
	    integer *, integer *, ftnlen, ftnlen);
    static integer iround;
    static logical notran;
    static integer isolve;
    static logical lquery;
#define a_subscr(a_1,a_2) (a_2)*a_dim1 + a_1
#define a_ref(a_1,a_2) a[a_subscr(a_1,a_2)]
#define b_subscr(a_1,a_2) (a_2)*b_dim1 + a_1
#define b_ref(a_1,a_2) b[b_subscr(a_1,a_2)]
#define c___subscr(a_1,a_2) (a_2)*c_dim1 + a_1
#define c___ref(a_1,a_2) c__[c___subscr(a_1,a_2)]
#define d___subscr(a_1,a_2) (a_2)*d_dim1 + a_1
#define d___ref(a_1,a_2) d__[d___subscr(a_1,a_2)]
#define e_subscr(a_1,a_2) (a_2)*e_dim1 + a_1
#define e_ref(a_1,a_2) e[e_subscr(a_1,a_2)]
#define f_subscr(a_1,a_2) (a_2)*f_dim1 + a_1
#define f_ref(a_1,a_2) f[f_subscr(a_1,a_2)]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    b_dim1 = *ldb;
    b_offset = 1 + b_dim1 * 1;
    b -= b_offset;
    c_dim1 = *ldc;
    c_offset = 1 + c_dim1 * 1;
    c__ -= c_offset;
    d_dim1 = *ldd;
    d_offset = 1 + d_dim1 * 1;
    d__ -= d_offset;
    e_dim1 = *lde;
    e_offset = 1 + e_dim1 * 1;
    e -= e_offset;
    f_dim1 = *ldf;
    f_offset = 1 + f_dim1 * 1;
    f -= f_offset;
    --work;
    --iwork;

    /* Function Body */
    *info = 0;
    notran = lsame_(trans, "N");
    lquery = *lwork == -1;

    if ((*ijob == 1 || *ijob == 2) && notran) {
/* Computing MAX */
	i__1 = 1, i__2 = (*m << 1) * *n;
	lwmin = max(i__1,i__2);
    } else {
	lwmin = 1;
    }

    if (! notran && ! lsame_(trans, "C")) {
	*info = -1;
    } else if (*ijob < 0 || *ijob > 4) {
	*info = -2;
    } else if (*m <= 0) {
	*info = -3;
    } else if (*n <= 0) {
	*info = -4;
    } else if (*lda < max(1,*m)) {
	*info = -6;
    } else if (*ldb < max(1,*n)) {
	*info = -8;
    } else if (*ldc < max(1,*m)) {
	*info = -10;
    } else if (*ldd < max(1,*m)) {
	*info = -12;
    } else if (*lde < max(1,*n)) {
	*info = -14;
    } else if (*ldf < max(1,*m)) {
	*info = -16;
    } else if (*lwork < lwmin && ! lquery) {
	*info = -20;
    }

    if (*info == 0) {
	work[1].r = (real) lwmin, work[1].i = 0.f;
    }

    if (*info != 0) {
	i__1 = -(*info);
	xerbla_("CTGSYL", &i__1);
	return 0;
    } else if (lquery) {
	return 0;
    }

/*     Determine  optimal block sizes MB and NB */

    mb = ilaenv_(&c__2, "CTGSYL", trans, m, n, &c_n1, &c_n1, (ftnlen)6, (
	    ftnlen)1);
    nb = ilaenv_(&c__5, "CTGSYL", trans, m, n, &c_n1, &c_n1, (ftnlen)6, (
	    ftnlen)1);

    isolve = 1;
    ifunc = 0;
    if (*ijob >= 3 && notran) {
	ifunc = *ijob - 2;
	i__1 = *n;
	for (j = 1; j <= i__1; ++j) {
	    ccopy_(m, &c_b16, &c__0, &c___ref(1, j), &c__1);
	    ccopy_(m, &c_b16, &c__0, &f_ref(1, j), &c__1);
/* L10: */
	}
    } else if (*ijob >= 1 && notran) {
	isolve = 2;
    }

    if (mb <= 1 && nb <= 1 || mb >= *m && nb >= *n) {

/*        Use unblocked Level 2 solver */

	i__1 = isolve;
	for (iround = 1; iround <= i__1; ++iround) {

	    *scale = 1.f;
	    dscale = 0.f;
	    dsum = 1.f;
	    pq = *m * *n;
	    ctgsy2_(trans, &ifunc, m, n, &a[a_offset], lda, &b[b_offset], ldb,
		     &c__[c_offset], ldc, &d__[d_offset], ldd, &e[e_offset], 
		    lde, &f[f_offset], ldf, scale, &dsum, &dscale, info);
	    if (dscale != 0.f) {
		if (*ijob == 1 || *ijob == 3) {
		    *dif = sqrt((real) ((*m << 1) * *n)) / (dscale * sqrt(
			    dsum));
		} else {
		    *dif = sqrt((real) pq) / (dscale * sqrt(dsum));
		}
	    }
	    if (isolve == 2 && iround == 1) {
		ifunc = *ijob;
		scale2 = *scale;
		clacpy_("F", m, n, &c__[c_offset], ldc, &work[1], m);
		clacpy_("F", m, n, &f[f_offset], ldf, &work[*m * *n + 1], m);
		i__2 = *n;
		for (j = 1; j <= i__2; ++j) {
		    ccopy_(m, &c_b16, &c__0, &c___ref(1, j), &c__1);
		    ccopy_(m, &c_b16, &c__0, &f_ref(1, j), &c__1);
/* L20: */
		}
	    } else if (isolve == 2 && iround == 2) {
		clacpy_("F", m, n, &work[1], m, &c__[c_offset], ldc);
		clacpy_("F", m, n, &work[*m * *n + 1], m, &f[f_offset], ldf);
		*scale = scale2;
	    }
/* L30: */
	}

	return 0;

    }

/*     Determine block structure of A */

    p = 0;
    i__ = 1;
L40:
    if (i__ > *m) {
	goto L50;
    }
    ++p;
    iwork[p] = i__;
    i__ += mb;
    if (i__ >= *m) {
	goto L50;
    }
    goto L40;
L50:
    iwork[p + 1] = *m + 1;
    if (iwork[p] == iwork[p + 1]) {
	--p;
    }

/*     Determine block structure of B */

    q = p + 1;
    j = 1;
L60:
    if (j > *n) {
	goto L70;
    }

    ++q;
    iwork[q] = j;
    j += nb;
    if (j >= *n) {
	goto L70;
    }
    goto L60;

L70:
    iwork[q + 1] = *n + 1;
    if (iwork[q] == iwork[q + 1]) {
	--q;
    }

    if (notran) {
	i__1 = isolve;
	for (iround = 1; iround <= i__1; ++iround) {

/*           Solve (I, J) - subsystem   
                 A(I, I) * R(I, J) - L(I, J) * B(J, J) = C(I, J)   
                 D(I, I) * R(I, J) - L(I, J) * E(J, J) = F(I, J)   
             for I = P, P - 1, ..., 1; J = 1, 2, ..., Q */

	    pq = 0;
	    *scale = 1.f;
	    dscale = 0.f;
	    dsum = 1.f;
	    i__2 = q;
	    for (j = p + 2; j <= i__2; ++j) {
		js = iwork[j];
		je = iwork[j + 1] - 1;
		nb = je - js + 1;
		for (i__ = p; i__ >= 1; --i__) {
		    is = iwork[i__];
		    ie = iwork[i__ + 1] - 1;
		    mb = ie - is + 1;
		    ctgsy2_(trans, &ifunc, &mb, &nb, &a_ref(is, is), lda, &
			    b_ref(js, js), ldb, &c___ref(is, js), ldc, &
			    d___ref(is, is), ldd, &e_ref(js, js), lde, &f_ref(
			    is, js), ldf, &scaloc, &dsum, &dscale, &linfo);
		    if (linfo > 0) {
			*info = linfo;
		    }
		    pq += mb * nb;
		    if (scaloc != 1.f) {
			i__3 = js - 1;
			for (k = 1; k <= i__3; ++k) {
			    q__1.r = scaloc, q__1.i = 0.f;
			    cscal_(m, &q__1, &c___ref(1, k), &c__1);
			    q__1.r = scaloc, q__1.i = 0.f;
			    cscal_(m, &q__1, &f_ref(1, k), &c__1);
/* L80: */
			}
			i__3 = je;
			for (k = js; k <= i__3; ++k) {
			    i__4 = is - 1;
			    q__1.r = scaloc, q__1.i = 0.f;
			    cscal_(&i__4, &q__1, &c___ref(1, k), &c__1);
			    i__4 = is - 1;
			    q__1.r = scaloc, q__1.i = 0.f;
			    cscal_(&i__4, &q__1, &f_ref(1, k), &c__1);
/* L90: */
			}
			i__3 = je;
			for (k = js; k <= i__3; ++k) {
			    i__4 = *m - ie;
			    q__1.r = scaloc, q__1.i = 0.f;
			    cscal_(&i__4, &q__1, &c___ref(ie + 1, k), &c__1);
			    i__4 = *m - ie;
			    q__1.r = scaloc, q__1.i = 0.f;
			    cscal_(&i__4, &q__1, &f_ref(ie + 1, k), &c__1);
/* L100: */
			}
			i__3 = *n;
			for (k = je + 1; k <= i__3; ++k) {
			    q__1.r = scaloc, q__1.i = 0.f;
			    cscal_(m, &q__1, &c___ref(1, k), &c__1);
			    q__1.r = scaloc, q__1.i = 0.f;
			    cscal_(m, &q__1, &f_ref(1, k), &c__1);
/* L110: */
			}
			*scale *= scaloc;
		    }

/*                 Substitute R(I,J) and L(I,J) into remaining equation. */

		    if (i__ > 1) {
			i__3 = is - 1;
			cgemm_("N", "N", &i__3, &nb, &mb, &c_b53, &a_ref(1, 
				is), lda, &c___ref(is, js), ldc, &c_b54, &
				c___ref(1, js), ldc);
			i__3 = is - 1;
			cgemm_("N", "N", &i__3, &nb, &mb, &c_b53, &d___ref(1, 
				is), ldd, &c___ref(is, js), ldc, &c_b54, &
				f_ref(1, js), ldf);
		    }
		    if (j < q) {
			i__3 = *n - je;
			cgemm_("N", "N", &mb, &i__3, &nb, &c_b54, &f_ref(is, 
				js), ldf, &b_ref(js, je + 1), ldb, &c_b54, &
				c___ref(is, je + 1), ldc);
			i__3 = *n - je;
			cgemm_("N", "N", &mb, &i__3, &nb, &c_b54, &f_ref(is, 
				js), ldf, &e_ref(js, je + 1), lde, &c_b54, &
				f_ref(is, je + 1), ldf);
		    }
/* L120: */
		}
/* L130: */
	    }
	    if (dscale != 0.f) {
		if (*ijob == 1 || *ijob == 3) {
		    *dif = sqrt((real) ((*m << 1) * *n)) / (dscale * sqrt(
			    dsum));
		} else {
		    *dif = sqrt((real) pq) / (dscale * sqrt(dsum));
		}
	    }
	    if (isolve == 2 && iround == 1) {
		ifunc = *ijob;
		scale2 = *scale;
		clacpy_("F", m, n, &c__[c_offset], ldc, &work[1], m);
		clacpy_("F", m, n, &f[f_offset], ldf, &work[*m * *n + 1], m);
		i__2 = *n;
		for (j = 1; j <= i__2; ++j) {
		    ccopy_(m, &c_b16, &c__0, &c___ref(1, j), &c__1);
		    ccopy_(m, &c_b16, &c__0, &f_ref(1, j), &c__1);
/* L140: */
		}
	    } else if (isolve == 2 && iround == 2) {
		clacpy_("F", m, n, &work[1], m, &c__[c_offset], ldc);
		clacpy_("F", m, n, &work[*m * *n + 1], m, &f[f_offset], ldf);
		*scale = scale2;
	    }
/* L150: */
	}
    } else {

/*        Solve transposed (I, J)-subsystem   
              A(I, I)' * R(I, J) + D(I, I)' * L(I, J) = C(I, J)   
              R(I, J) * B(J, J)  + L(I, J) * E(J, J) = -F(I, J)   
          for I = 1,2,..., P; J = Q, Q-1,..., 1 */

	*scale = 1.f;
	i__1 = p;
	for (i__ = 1; i__ <= i__1; ++i__) {
	    is = iwork[i__];
	    ie = iwork[i__ + 1] - 1;
	    mb = ie - is + 1;
	    i__2 = p + 2;
	    for (j = q; j >= i__2; --j) {
		js = iwork[j];
		je = iwork[j + 1] - 1;
		nb = je - js + 1;
		ctgsy2_(trans, &ifunc, &mb, &nb, &a_ref(is, is), lda, &b_ref(
			js, js), ldb, &c___ref(is, js), ldc, &d___ref(is, is),
			 ldd, &e_ref(js, js), lde, &f_ref(is, js), ldf, &
			scaloc, &dsum, &dscale, &linfo);
		if (linfo > 0) {
		    *info = linfo;
		}
		if (scaloc != 1.f) {
		    i__3 = js - 1;
		    for (k = 1; k <= i__3; ++k) {
			q__1.r = scaloc, q__1.i = 0.f;
			cscal_(m, &q__1, &c___ref(1, k), &c__1);
			q__1.r = scaloc, q__1.i = 0.f;
			cscal_(m, &q__1, &f_ref(1, k), &c__1);
/* L160: */
		    }
		    i__3 = je;
		    for (k = js; k <= i__3; ++k) {
			i__4 = is - 1;
			q__1.r = scaloc, q__1.i = 0.f;
			cscal_(&i__4, &q__1, &c___ref(1, k), &c__1);
			i__4 = is - 1;
			q__1.r = scaloc, q__1.i = 0.f;
			cscal_(&i__4, &q__1, &f_ref(1, k), &c__1);
/* L170: */
		    }
		    i__3 = je;
		    for (k = js; k <= i__3; ++k) {
			i__4 = *m - ie;
			q__1.r = scaloc, q__1.i = 0.f;
			cscal_(&i__4, &q__1, &c___ref(ie + 1, k), &c__1);
			i__4 = *m - ie;
			q__1.r = scaloc, q__1.i = 0.f;
			cscal_(&i__4, &q__1, &f_ref(ie + 1, k), &c__1);
/* L180: */
		    }
		    i__3 = *n;
		    for (k = je + 1; k <= i__3; ++k) {
			q__1.r = scaloc, q__1.i = 0.f;
			cscal_(m, &q__1, &c___ref(1, k), &c__1);
			q__1.r = scaloc, q__1.i = 0.f;
			cscal_(m, &q__1, &f_ref(1, k), &c__1);
/* L190: */
		    }
		    *scale *= scaloc;
		}

/*              Substitute R(I,J) and L(I,J) into remaining equation. */

		if (j > p + 2) {
		    i__3 = js - 1;
		    cgemm_("N", "C", &mb, &i__3, &nb, &c_b54, &c___ref(is, js)
			    , ldc, &b_ref(1, js), ldb, &c_b54, &f_ref(is, 1), 
			    ldf);
		    i__3 = js - 1;
		    cgemm_("N", "C", &mb, &i__3, &nb, &c_b54, &f_ref(is, js), 
			    ldf, &e_ref(1, js), lde, &c_b54, &f_ref(is, 1), 
			    ldf);
		}
		if (i__ < p) {
		    i__3 = *m - ie;
		    cgemm_("C", "N", &i__3, &nb, &mb, &c_b53, &a_ref(is, ie + 
			    1), lda, &c___ref(is, js), ldc, &c_b54, &c___ref(
			    ie + 1, js), ldc);
		    i__3 = *m - ie;
		    cgemm_("C", "N", &i__3, &nb, &mb, &c_b53, &d___ref(is, ie 
			    + 1), ldd, &f_ref(is, js), ldf, &c_b54, &c___ref(
			    ie + 1, js), ldc);
		}
/* L200: */
	    }
/* L210: */
	}
    }

    work[1].r = (real) lwmin, work[1].i = 0.f;

    return 0;

/*     End of CTGSYL */

} /* ctgsyl_ */
Example #3
0
/* Subroutine */ int slaqps_(integer *m, integer *n, integer *offset, integer 
	*nb, integer *kb, real *a, integer *lda, integer *jpvt, real *tau, 
	real *vn1, real *vn2, real *auxv, real *f, integer *ldf)
{
/*  -- LAPACK auxiliary routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       June 30, 1999   


    Purpose   
    =======   

    SLAQPS computes a step of QR factorization with column pivoting   
    of a real M-by-N matrix A by using Blas-3.  It tries to factorize   
    NB columns from A starting from the row OFFSET+1, and updates all   
    of the matrix with Blas-3 xGEMM.   

    In some cases, due to catastrophic cancellations, it cannot   
    factorize NB columns.  Hence, the actual number of factorized   
    columns is returned in KB.   

    Block A(1:OFFSET,1:N) is accordingly pivoted, but not factorized.   

    Arguments   
    =========   

    M       (input) INTEGER   
            The number of rows of the matrix A. M >= 0.   

    N       (input) INTEGER   
            The number of columns of the matrix A. N >= 0   

    OFFSET  (input) INTEGER   
            The number of rows of A that have been factorized in   
            previous steps.   

    NB      (input) INTEGER   
            The number of columns to factorize.   

    KB      (output) INTEGER   
            The number of columns actually factorized.   

    A       (input/output) REAL array, dimension (LDA,N)   
            On entry, the M-by-N matrix A.   
            On exit, block A(OFFSET+1:M,1:KB) is the triangular   
            factor obtained and block A(1:OFFSET,1:N) has been   
            accordingly pivoted, but no factorized.   
            The rest of the matrix, block A(OFFSET+1:M,KB+1:N) has   
            been updated.   

    LDA     (input) INTEGER   
            The leading dimension of the array A. LDA >= max(1,M).   

    JPVT    (input/output) INTEGER array, dimension (N)   
            JPVT(I) = K <==> Column K of the full matrix A has been   
            permuted into position I in AP.   

    TAU     (output) REAL array, dimension (KB)   
            The scalar factors of the elementary reflectors.   

    VN1     (input/output) REAL array, dimension (N)   
            The vector with the partial column norms.   

    VN2     (input/output) REAL array, dimension (N)   
            The vector with the exact column norms.   

    AUXV    (input/output) REAL array, dimension (NB)   
            Auxiliar vector.   

    F       (input/output) REAL array, dimension (LDF,NB)   
            Matrix F' = L*Y'*A.   

    LDF     (input) INTEGER   
            The leading dimension of the array F. LDF >= max(1,N).   

    Further Details   
    ===============   

    Based on contributions by   
      G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain   
      X. Sun, Computer Science Dept., Duke University, USA   

    =====================================================================   


       Parameter adjustments */
    /* Table of constant values */
    static integer c__1 = 1;
    static real c_b7 = -1.f;
    static real c_b8 = 1.f;
    static real c_b15 = 0.f;
    
    /* System generated locals */
    integer a_dim1, a_offset, f_dim1, f_offset, i__1, i__2;
    real r__1, r__2;
    /* Builtin functions */
    double sqrt(doublereal);
    integer i_nint(real *);
    /* Local variables */
    static real temp, temp2;
    extern doublereal snrm2_(integer *, real *, integer *);
    static integer j, k;
    extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, 
	    integer *, real *, real *, integer *, real *, integer *, real *, 
	    real *, integer *);
    static integer itemp;
    extern /* Subroutine */ int sgemv_(char *, integer *, integer *, real *, 
	    real *, integer *, real *, integer *, real *, real *, integer *), sswap_(integer *, real *, integer *, real *, integer *);
    static integer rk;
    extern /* Subroutine */ int slarfg_(integer *, real *, real *, integer *, 
	    real *);
    static integer lsticc;
    extern integer isamax_(integer *, real *, integer *);
    static integer lastrk;
    static real akk;
    static integer pvt;
#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
#define f_ref(a_1,a_2) f[(a_2)*f_dim1 + a_1]


    a_dim1 = *lda;
    a_offset = 1 + a_dim1 * 1;
    a -= a_offset;
    --jpvt;
    --tau;
    --vn1;
    --vn2;
    --auxv;
    f_dim1 = *ldf;
    f_offset = 1 + f_dim1 * 1;
    f -= f_offset;

    /* Function Body   
   Computing MIN */
    i__1 = *m, i__2 = *n + *offset;
    lastrk = min(i__1,i__2);
    lsticc = 0;
    k = 0;

/*     Beginning of while loop. */

L10:
    if (k < *nb && lsticc == 0) {
	++k;
	rk = *offset + k;

/*        Determine ith pivot column and swap if necessary */

	i__1 = *n - k + 1;
	pvt = k - 1 + isamax_(&i__1, &vn1[k], &c__1);
	if (pvt != k) {
	    sswap_(m, &a_ref(1, pvt), &c__1, &a_ref(1, k), &c__1);
	    i__1 = k - 1;
	    sswap_(&i__1, &f_ref(pvt, 1), ldf, &f_ref(k, 1), ldf);
	    itemp = jpvt[pvt];
	    jpvt[pvt] = jpvt[k];
	    jpvt[k] = itemp;
	    vn1[pvt] = vn1[k];
	    vn2[pvt] = vn2[k];
	}

/*        Apply previous Householder reflectors to column K:   
          A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)'. */

	if (k > 1) {
	    i__1 = *m - rk + 1;
	    i__2 = k - 1;
	    sgemv_("No transpose", &i__1, &i__2, &c_b7, &a_ref(rk, 1), lda, &
		    f_ref(k, 1), ldf, &c_b8, &a_ref(rk, k), &c__1)
		    ;
	}

/*        Generate elementary reflector H(k). */

	if (rk < *m) {
	    i__1 = *m - rk + 1;
	    slarfg_(&i__1, &a_ref(rk, k), &a_ref(rk + 1, k), &c__1, &tau[k]);
	} else {
	    slarfg_(&c__1, &a_ref(rk, k), &a_ref(rk, k), &c__1, &tau[k]);
	}

	akk = a_ref(rk, k);
	a_ref(rk, k) = 1.f;

/*        Compute Kth column of F:   

          Compute  F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)'*A(RK:M,K). */

	if (k < *n) {
	    i__1 = *m - rk + 1;
	    i__2 = *n - k;
	    sgemv_("Transpose", &i__1, &i__2, &tau[k], &a_ref(rk, k + 1), lda,
		     &a_ref(rk, k), &c__1, &c_b15, &f_ref(k + 1, k), &c__1);
	}

/*        Padding F(1:K,K) with zeros. */

	i__1 = k;
	for (j = 1; j <= i__1; ++j) {
	    f_ref(j, k) = 0.f;
/* L20: */
	}

/*        Incremental updating of F:   
          F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)'   
                      *A(RK:M,K). */

	if (k > 1) {
	    i__1 = *m - rk + 1;
	    i__2 = k - 1;
	    r__1 = -tau[k];
	    sgemv_("Transpose", &i__1, &i__2, &r__1, &a_ref(rk, 1), lda, &
		    a_ref(rk, k), &c__1, &c_b15, &auxv[1], &c__1);

	    i__1 = k - 1;
	    sgemv_("No transpose", n, &i__1, &c_b8, &f_ref(1, 1), ldf, &auxv[
		    1], &c__1, &c_b8, &f_ref(1, k), &c__1);
	}

/*        Update the current row of A:   
          A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)'. */

	if (k < *n) {
	    i__1 = *n - k;
	    sgemv_("No transpose", &i__1, &k, &c_b7, &f_ref(k + 1, 1), ldf, &
		    a_ref(rk, 1), lda, &c_b8, &a_ref(rk, k + 1), lda);
	}

/*        Update partial column norms. */

	if (rk < lastrk) {
	    i__1 = *n;
	    for (j = k + 1; j <= i__1; ++j) {
		if (vn1[j] != 0.f) {
		    temp = (r__1 = a_ref(rk, j), dabs(r__1)) / vn1[j];
/* Computing MAX */
		    r__1 = 0.f, r__2 = (temp + 1.f) * (1.f - temp);
		    temp = dmax(r__1,r__2);
/* Computing 2nd power */
		    r__1 = vn1[j] / vn2[j];
		    temp2 = temp * .05f * (r__1 * r__1) + 1.f;
		    if (temp2 == 1.f) {
			vn2[j] = (real) lsticc;
			lsticc = j;
		    } else {
			vn1[j] *= sqrt(temp);
		    }
		}
/* L30: */
	    }
	}

	a_ref(rk, k) = akk;

/*        End of while loop. */

	goto L10;
    }
    *kb = k;
    rk = *offset + *kb;

/*     Apply the block reflector to the rest of the matrix:   
       A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) -   
                           A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)'.   

   Computing MIN */
    i__1 = *n, i__2 = *m - *offset;
    if (*kb < min(i__1,i__2)) {
	i__1 = *m - rk;
	i__2 = *n - *kb;
	sgemm_("No transpose", "Transpose", &i__1, &i__2, kb, &c_b7, &a_ref(
		rk + 1, 1), lda, &f_ref(*kb + 1, 1), ldf, &c_b8, &a_ref(rk + 
		1, *kb + 1), lda);
    }

/*     Recomputation of difficult columns. */

L40:
    if (lsticc > 0) {
	itemp = i_nint(&vn2[lsticc]);
	i__1 = *m - rk;
	vn1[lsticc] = snrm2_(&i__1, &a_ref(rk + 1, lsticc), &c__1);
	vn2[lsticc] = vn1[lsticc];
	lsticc = itemp;
	goto L40;
    }

    return 0;

/*     End of SLAQPS */

} /* slaqps_ */
Example #4
0
/* Subroutine */ int dchkgk_(integer *nin, integer *nout)
{
    /* Format strings */
    static char fmt_9999[] = "(1x,\002.. test output of DGGBAK .. \002)";
    static char fmt_9998[] = "(\002 value of largest test error             "
	    "     =\002,d12.3)";
    static char fmt_9997[] = "(\002 example number where DGGBAL info is not "
	    "0    =\002,i4)";
    static char fmt_9996[] = "(\002 example number where DGGBAK(L) info is n"
	    "ot 0 =\002,i4)";
    static char fmt_9995[] = "(\002 example number where DGGBAK(R) info is n"
	    "ot 0 =\002,i4)";
    static char fmt_9994[] = "(\002 example number having largest error     "
	    "     =\002,i4)";
    static char fmt_9993[] = "(\002 number of examples where info is not 0  "
	    "     =\002,i4)";
    static char fmt_9992[] = "(\002 total number of examples tested         "
	    "     =\002,i4)";

    /* System generated locals */
    integer i__1, i__2;
    doublereal d__1, d__2, d__3;

    /* Builtin functions */
    integer s_rsle(cilist *), do_lio(integer *, integer *, char *, ftnlen), 
	    e_rsle(void), s_wsfe(cilist *), e_wsfe(void), do_fio(integer *, 
	    char *, ftnlen);

    /* Local variables */
    static integer info, lmax[4];
    static doublereal rmax, vmax, work[2500]	/* was [50][50] */, a[2500]	
	    /* was [50][50] */, b[2500]	/* was [50][50] */, e[2500]	/* 
	    was [50][50] */, f[2500]	/* was [50][50] */;
    static integer i__, j, m, n;
    extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
	    integer *, doublereal *, doublereal *, integer *);
    static integer ninfo;
    static doublereal anorm, bnorm, af[2500]	/* was [50][50] */, bf[2500]	
	    /* was [50][50] */;
    extern /* Subroutine */ int dggbak_(char *, char *, integer *, integer *, 
	    integer *, doublereal *, doublereal *, integer *, doublereal *, 
	    integer *, integer *), dggbal_(char *, integer *, 
	    doublereal *, integer *, doublereal *, integer *, integer *, 
	    integer *, doublereal *, doublereal *, doublereal *, integer *);
    extern doublereal dlamch_(char *), dlange_(char *, integer *, 
	    integer *, doublereal *, integer *, doublereal *);
    static doublereal vl[2500]	/* was [50][50] */, lscale[50], vr[2500]	
	    /* was [50][50] */, rscale[50];
    extern /* Subroutine */ int dlacpy_(char *, integer *, integer *, 
	    doublereal *, integer *, doublereal *, integer *);
    static integer ihi, ilo;
    static doublereal eps, vlf[2500]	/* was [50][50] */;
    static integer knt;
    static doublereal vrf[2500]	/* was [50][50] */;

    /* Fortran I/O blocks */
    static cilist io___6 = { 0, 0, 0, 0, 0 };
    static cilist io___10 = { 0, 0, 0, 0, 0 };
    static cilist io___13 = { 0, 0, 0, 0, 0 };
    static cilist io___15 = { 0, 0, 0, 0, 0 };
    static cilist io___17 = { 0, 0, 0, 0, 0 };
    static cilist io___34 = { 0, 0, 0, fmt_9999, 0 };
    static cilist io___35 = { 0, 0, 0, fmt_9998, 0 };
    static cilist io___36 = { 0, 0, 0, fmt_9997, 0 };
    static cilist io___37 = { 0, 0, 0, fmt_9996, 0 };
    static cilist io___38 = { 0, 0, 0, fmt_9995, 0 };
    static cilist io___39 = { 0, 0, 0, fmt_9994, 0 };
    static cilist io___40 = { 0, 0, 0, fmt_9993, 0 };
    static cilist io___41 = { 0, 0, 0, fmt_9992, 0 };



#define a_ref(a_1,a_2) a[(a_2)*50 + a_1 - 51]
#define b_ref(a_1,a_2) b[(a_2)*50 + a_1 - 51]
#define e_ref(a_1,a_2) e[(a_2)*50 + a_1 - 51]
#define f_ref(a_1,a_2) f[(a_2)*50 + a_1 - 51]
#define vl_ref(a_1,a_2) vl[(a_2)*50 + a_1 - 51]
#define vr_ref(a_1,a_2) vr[(a_2)*50 + a_1 - 51]


/*  -- LAPACK test routine (version 3.0) --   
       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
       Courant Institute, Argonne National Lab, and Rice University   
       September 30, 1994   


    Purpose   
    =======   

    DCHKGK tests DGGBAK, a routine for backward balancing  of   
    a matrix pair (A, B).   

    Arguments   
    =========   

    NIN     (input) INTEGER   
            The logical unit number for input.  NIN > 0.   

    NOUT    (input) INTEGER   
            The logical unit number for output.  NOUT > 0.   

    =====================================================================   


       Initialization */

    lmax[0] = 0;
    lmax[1] = 0;
    lmax[2] = 0;
    lmax[3] = 0;
    ninfo = 0;
    knt = 0;
    rmax = 0.;

    eps = dlamch_("Precision");

L10:
    io___6.ciunit = *nin;
    s_rsle(&io___6);
    do_lio(&c__3, &c__1, (char *)&n, (ftnlen)sizeof(integer));
    do_lio(&c__3, &c__1, (char *)&m, (ftnlen)sizeof(integer));
    e_rsle();
    if (n == 0) {
	goto L100;
    }

    i__1 = n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	io___10.ciunit = *nin;
	s_rsle(&io___10);
	i__2 = n;
	for (j = 1; j <= i__2; ++j) {
	    do_lio(&c__5, &c__1, (char *)&a_ref(i__, j), (ftnlen)sizeof(
		    doublereal));
	}
	e_rsle();
/* L20: */
    }

    i__1 = n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	io___13.ciunit = *nin;
	s_rsle(&io___13);
	i__2 = n;
	for (j = 1; j <= i__2; ++j) {
	    do_lio(&c__5, &c__1, (char *)&b_ref(i__, j), (ftnlen)sizeof(
		    doublereal));
	}
	e_rsle();
/* L30: */
    }

    i__1 = n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	io___15.ciunit = *nin;
	s_rsle(&io___15);
	i__2 = m;
	for (j = 1; j <= i__2; ++j) {
	    do_lio(&c__5, &c__1, (char *)&vl_ref(i__, j), (ftnlen)sizeof(
		    doublereal));
	}
	e_rsle();
/* L40: */
    }

    i__1 = n;
    for (i__ = 1; i__ <= i__1; ++i__) {
	io___17.ciunit = *nin;
	s_rsle(&io___17);
	i__2 = m;
	for (j = 1; j <= i__2; ++j) {
	    do_lio(&c__5, &c__1, (char *)&vr_ref(i__, j), (ftnlen)sizeof(
		    doublereal));
	}
	e_rsle();
/* L50: */
    }

    ++knt;

    anorm = dlange_("M", &n, &n, a, &c__50, work);
    bnorm = dlange_("M", &n, &n, b, &c__50, work);

    dlacpy_("FULL", &n, &n, a, &c__50, af, &c__50);
    dlacpy_("FULL", &n, &n, b, &c__50, bf, &c__50);

    dggbal_("B", &n, a, &c__50, b, &c__50, &ilo, &ihi, lscale, rscale, work, &
	    info);
    if (info != 0) {
	++ninfo;
	lmax[0] = knt;
    }

    dlacpy_("FULL", &n, &m, vl, &c__50, vlf, &c__50);
    dlacpy_("FULL", &n, &m, vr, &c__50, vrf, &c__50);

    dggbak_("B", "L", &n, &ilo, &ihi, lscale, rscale, &m, vl, &c__50, &info);
    if (info != 0) {
	++ninfo;
	lmax[1] = knt;
    }

    dggbak_("B", "R", &n, &ilo, &ihi, lscale, rscale, &m, vr, &c__50, &info);
    if (info != 0) {
	++ninfo;
	lmax[2] = knt;
    }

/*     Test of DGGBAK   

       Check tilde(VL)'*A*tilde(VR) - VL'*tilde(A)*VR   
       where tilde(A) denotes the transformed matrix. */

    dgemm_("N", "N", &n, &m, &n, &c_b52, af, &c__50, vr, &c__50, &c_b55, work,
	     &c__50);
    dgemm_("T", "N", &m, &m, &n, &c_b52, vl, &c__50, work, &c__50, &c_b55, e, 
	    &c__50);

    dgemm_("N", "N", &n, &m, &n, &c_b52, a, &c__50, vrf, &c__50, &c_b55, work,
	     &c__50);
    dgemm_("T", "N", &m, &m, &n, &c_b52, vlf, &c__50, work, &c__50, &c_b55, f,
	     &c__50);

    vmax = 0.;
    i__1 = m;
    for (j = 1; j <= i__1; ++j) {
	i__2 = m;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = vmax, d__3 = (d__1 = e_ref(i__, j) - f_ref(i__, j), abs(
		    d__1));
	    vmax = max(d__2,d__3);
/* L60: */
	}
/* L70: */
    }
    vmax /= eps * max(anorm,bnorm);
    if (vmax > rmax) {
	lmax[3] = knt;
	rmax = vmax;
    }

/*     Check tilde(VL)'*B*tilde(VR) - VL'*tilde(B)*VR */

    dgemm_("N", "N", &n, &m, &n, &c_b52, bf, &c__50, vr, &c__50, &c_b55, work,
	     &c__50);
    dgemm_("T", "N", &m, &m, &n, &c_b52, vl, &c__50, work, &c__50, &c_b55, e, 
	    &c__50);

    dgemm_("N", "N", &n, &m, &n, &c_b52, b, &c__50, vrf, &c__50, &c_b55, work,
	     &c__50);
    dgemm_("T", "N", &m, &m, &n, &c_b52, vlf, &c__50, work, &c__50, &c_b55, f,
	     &c__50);

    vmax = 0.;
    i__1 = m;
    for (j = 1; j <= i__1; ++j) {
	i__2 = m;
	for (i__ = 1; i__ <= i__2; ++i__) {
/* Computing MAX */
	    d__2 = vmax, d__3 = (d__1 = e_ref(i__, j) - f_ref(i__, j), abs(
		    d__1));
	    vmax = max(d__2,d__3);
/* L80: */
	}
/* L90: */
    }
    vmax /= eps * max(anorm,bnorm);
    if (vmax > rmax) {
	lmax[3] = knt;
	rmax = vmax;
    }

    goto L10;

L100:

    io___34.ciunit = *nout;
    s_wsfe(&io___34);
    e_wsfe();

    io___35.ciunit = *nout;
    s_wsfe(&io___35);
    do_fio(&c__1, (char *)&rmax, (ftnlen)sizeof(doublereal));
    e_wsfe();
    io___36.ciunit = *nout;
    s_wsfe(&io___36);
    do_fio(&c__1, (char *)&lmax[0], (ftnlen)sizeof(integer));
    e_wsfe();
    io___37.ciunit = *nout;
    s_wsfe(&io___37);
    do_fio(&c__1, (char *)&lmax[1], (ftnlen)sizeof(integer));
    e_wsfe();
    io___38.ciunit = *nout;
    s_wsfe(&io___38);
    do_fio(&c__1, (char *)&lmax[2], (ftnlen)sizeof(integer));
    e_wsfe();
    io___39.ciunit = *nout;
    s_wsfe(&io___39);
    do_fio(&c__1, (char *)&lmax[3], (ftnlen)sizeof(integer));
    e_wsfe();
    io___40.ciunit = *nout;
    s_wsfe(&io___40);
    do_fio(&c__1, (char *)&ninfo, (ftnlen)sizeof(integer));
    e_wsfe();
    io___41.ciunit = *nout;
    s_wsfe(&io___41);
    do_fio(&c__1, (char *)&knt, (ftnlen)sizeof(integer));
    e_wsfe();

    return 0;

/*     End of DCHKGK */

} /* dchkgk_ */