示例#1
0
void magmaf_dgehrd(
    magma_int_t *n, magma_int_t *ilo, magma_int_t *ihi,
    double *A, magma_int_t *lda,
    double *tau,
    double *work, magma_int_t *lwork,
    devptr_t *dT,
    magma_int_t *info )
{
    magma_dgehrd(
        *n, *ilo, *ihi,
        A, *lda,
        tau,
        work, *lwork,
        magma_ddevptr(dT),
        info );
}
示例#2
0
extern "C" magma_int_t
magma_dgeev(
    magma_vec_t jobvl, magma_vec_t jobvr, magma_int_t n,
    double *a, magma_int_t lda,
    double *WR, double *WI,
    double *vl, magma_int_t ldvl,
    double *vr, magma_int_t ldvr,
    double *work, magma_int_t lwork,
    magma_queue_t queue,
    magma_int_t *info)
{
/*  -- clMAGMA (version 1.3.0) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       @date November 2014

    Purpose
    =======
    DGEEV computes for an N-by-N real nonsymmetric matrix A, the
    eigenvalues and, optionally, the left and/or right eigenvectors.

    The right eigenvector v(j) of A satisfies
                     A * v(j) = lambda(j) * v(j)
    where lambda(j) is its eigenvalue.
    The left eigenvector u(j) of A satisfies
                  u(j)**T * A = lambda(j) * u(j)**T
    where u(j)**T denotes the transpose of u(j).

    The computed eigenvectors are normalized to have Euclidean norm
    equal to 1 and largest component real.

    Arguments
    =========
    JOBVL   (input) CHARACTER*1
            = 'N': left eigenvectors of A are not computed;
            = 'V': left eigenvectors of are computed.

    JOBVR   (input) CHARACTER*1
            = 'N': right eigenvectors of A are not computed;
            = 'V': right eigenvectors of A are computed.

    N       (input) INTEGER
            The order of the matrix A. N >= 0.

    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
            On entry, the N-by-N matrix A.
            On exit, A has been overwritten.

    LDA     (input) INTEGER
            The leading dimension of the array A.  LDA >= max(1,N).

    WR      (output) DOUBLE PRECISION array, dimension (N)
    WI      (output) DOUBLE PRECISION array, dimension (N)
            WR and WI contain the real and imaginary parts,
            respectively, of the computed eigenvalues.  Complex
            conjugate pairs of eigenvalues appear consecutively
            with the eigenvalue having the positive imaginary part
            first.

    VL      (output) DOUBLE PRECISION array, dimension (LDVL,N)
            If JOBVL = 'V', the left eigenvectors u(j) are stored one
            after another in the columns of VL, in the same order
            as their eigenvalues.
            If JOBVL = 'N', VL is not referenced.
            u(j) = VL(:,j), the j-th column of VL.

    LDVL    (input) INTEGER
            The leading dimension of the array VL.  LDVL >= 1; if
            JOBVL = 'V', LDVL >= N.

    VR      (output) DOUBLE PRECISION array, dimension (LDVR,N)
            If JOBVR = 'V', the right eigenvectors v(j) are stored one
            after another in the columns of VR, in the same order
            as their eigenvalues.
            If JOBVR = 'N', VR is not referenced.
            v(j) = VR(:,j), the j-th column of VR.

    LDVR    (input) INTEGER
            The leading dimension of the array VR.  LDVR >= 1; if
            JOBVR = 'V', LDVR >= N.

    WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.

    LWORK   (input) INTEGER
            The dimension of the array WORK.  LWORK >= (1+nb)*N.

            If LWORK = -1, then a workspace query is assumed; the routine
            only calculates the optimal size of the WORK array, returns
            this value as the first entry of the WORK array, and no error
            message related to LWORK is issued by XERBLA.

    INFO    (output) INTEGER
            = 0:  successful exit
            < 0:  if INFO = -i, the i-th argument had an illegal value.
            > 0:  if INFO = i, the QR algorithm failed to compute all the
                  eigenvalues, and no eigenvectors have been computed;
                  elements and i+1:N of W contain eigenvalues which have
                  converged.
    =====================================================================    */

    magma_int_t ione = 1;
    magma_int_t c__1 = 1;
    magma_int_t c__0 = 0;
    magma_int_t c_n1 = -1;
    
    magma_int_t a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1,
            i__2, i__3;
    double d__1, d__2;

    magma_int_t i__, k, ihi, ilo;
    double      r__, cs, sn, scl;
    double dum[1], eps;
    magma_int_t ibal;
    double anrm;
    magma_int_t ierr, itau, iwrk, nout;
    magma_int_t scalea;
    double cscale;
    double bignum;
    magma_int_t minwrk;
    magma_int_t wantvl;
    double smlnum;
    magma_int_t lquery, wantvr, select[1];

    magma_int_t nb = 0;
    magmaDouble_ptr dT;
    //magma_timestr_t start, end;

    const char* side_ = NULL;

    *info = 0;
    lquery = lwork == -1;
    wantvl = (jobvl == MagmaVec);
    wantvr = (jobvr == MagmaVec);
    if (! wantvl && jobvl != MagmaNoVec) {
        *info = -1;
    } else if (! wantvr && jobvr != MagmaNoVec) {
        *info = -2;
    } else if (n < 0) {
        *info = -3;
    } else if (lda < max(1,n)) {
        *info = -5;
    } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) {
        *info = -9;
    } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) {
        *info = -11;
    }

    /*  Compute workspace   */
    if (*info == 0) {

        nb = magma_get_dgehrd_nb(n);
        minwrk = (2+nb)*n;
        work[0] = (double) minwrk;
        
        if (lwork < minwrk && ! lquery) {
            *info = -13;
        }

    }

    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }
    else if (lquery) {
        return *info;
    }

    /* Quick return if possible */
    if (n == 0) {
        return *info;
    }
   
    // if eigenvectors are needed
#if defined(VERSION3)
    if (MAGMA_SUCCESS != magma_dmalloc( &dT, nb*n )) {
        *info = MAGMA_ERR_DEVICE_ALLOC;
        return *info;
    }
#endif

    // subtract row and col for 1-based indexing
    a_dim1   = lda;
    a_offset = 1 + a_dim1;
    a       -= a_offset;
    vl_dim1   = ldvl;
    vl_offset = 1 + vl_dim1;
    vl       -= vl_offset;
    vr_dim1   = ldvr;
    vr_offset = 1 + vr_dim1;
    vr       -= vr_offset;
    --work;

    /* Get machine constants */
    eps    = lapackf77_dlamch("P");
    smlnum = lapackf77_dlamch("S");
    bignum = 1. / smlnum;
    lapackf77_dlabad(&smlnum, &bignum);
    smlnum = magma_dsqrt(smlnum) / eps;
    bignum = 1. / smlnum;

    /* Scale A if max element outside range [SMLNUM,BIGNUM] */
    anrm = lapackf77_dlange("M", &n, &n, &a[a_offset], &lda, dum);
    scalea = 0;
    if (anrm > 0. && anrm < smlnum) {
        scalea = 1;
        cscale = smlnum;
    } else if (anrm > bignum) {
        scalea = 1;
        cscale = bignum;
    }
    if (scalea) {
        lapackf77_dlascl("G", &c__0, &c__0, &anrm, &cscale, &n, &n,
                &a[a_offset], &lda, &ierr);
    }

    /* Balance the matrix
       (Workspace: need N) */
    ibal = 1;
    lapackf77_dgebal("B", &n, &a[a_offset], &lda, &ilo, &ihi, &work[ibal], &ierr);

    /* Reduce to upper Hessenberg form
       (Workspace: need 3*N, prefer 2*N+N*NB) */
    itau = ibal + n;
    iwrk = itau + n;
    i__1 = lwork - iwrk + 1;

    //start = get_current_time();
#if defined(VERSION1)
    /*
     * Version 1 - LAPACK
     */
    lapackf77_dgehrd(&n, &ilo, &ihi, &a[a_offset], &lda,
                     &work[itau], &work[iwrk], &i__1, &ierr);
#elif defined(VERSION2)
    /*
     *  Version 2 - LAPACK consistent HRD
     */
    magma_dgehrd2(n, ilo, ihi, &a[a_offset], lda,
                  &work[itau], &work[iwrk], &i__1, &ierr);
#elif defined(VERSION3)
    /*
     * Version 3 - LAPACK consistent MAGMA HRD + matrices T stored,
     */
    magma_dgehrd(n, ilo, ihi, &a[a_offset], lda,
                 &work[itau], &work[iwrk], i__1, dT, 0, queue, &ierr);
#endif
    //end = get_current_time();
    //printf("    Time for dgehrd = %5.2f sec\n", GetTimerValue(start,end)/1000.);

    if (wantvl) {
      /*        Want left eigenvectors
                Copy Householder vectors to VL */
        side_ = "Left";
        lapackf77_dlacpy(MagmaLowerStr, &n, &n,
                         &a[a_offset], &lda, &vl[vl_offset], &ldvl);

        /*
         * Generate orthogonal matrix in VL
         *   (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB)
         */
        i__1 = lwork - iwrk + 1;

        //start = get_current_time();
#if defined(VERSION1) || defined(VERSION2)
        /*
         * Version 1 & 2 - LAPACK
         */
        lapackf77_dorghr(&n, &ilo, &ihi, &vl[vl_offset], &ldvl,
                         &work[itau], &work[iwrk], &i__1, &ierr);
#elif defined(VERSION3)
        /*
         * Version 3 - LAPACK consistent MAGMA HRD + matrices T stored
         */
        magma_dorghr(n, ilo, ihi, &vl[vl_offset], ldvl, &work[itau],
                     dT, 0, nb, queue, &ierr);
#endif
        //end = get_current_time();
        //printf("    Time for dorghr = %5.2f sec\n", GetTimerValue(start,end)/1000.);

        /*
         * Perform QR iteration, accumulating Schur vectors in VL
         *   (Workspace: need N+1, prefer N+HSWORK (see comments) )
         */
        iwrk = itau;
        i__1 = lwork - iwrk + 1;
        lapackf77_dhseqr("S", "V", &n, &ilo, &ihi, &a[a_offset], &lda, WR, WI,
                         &vl[vl_offset], &ldvl, &work[iwrk], &i__1, info);

        if (wantvr) {
          /* Want left and right eigenvectors
             Copy Schur vectors to VR */
            side_ = "Both";
            lapackf77_dlacpy("F", &n, &n, &vl[vl_offset], &ldvl, &vr[vr_offset], &ldvr);
        }

    } else if (wantvr) {
        /*  Want right eigenvectors
            Copy Householder vectors to VR */
        side_ = "Right";
        lapackf77_dlacpy("L", &n, &n, &a[a_offset], &lda, &vr[vr_offset], &ldvr);

        /*
         * Generate orthogonal matrix in VR
         *   (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB)
         */
        i__1 = lwork - iwrk + 1;
        //start = get_current_time();
#if defined(VERSION1) || defined(VERSION2)
        /*
         * Version 1 & 2 - LAPACK
         */
        lapackf77_dorghr(&n, &ilo, &ihi, &vr[vr_offset], &ldvr,
                         &work[itau], &work[iwrk], &i__1, &ierr);
#elif defined(VERSION3)
        /*
         * Version 3 - LAPACK consistent MAGMA HRD + matrices T stored
         */
        magma_dorghr(n, ilo, ihi, &vr[vr_offset], ldvr,
                     &work[itau], dT, 0, nb, queue, &ierr);
#endif
        //end = get_current_time();
        //printf("    Time for dorghr = %5.2f sec\n", GetTimerValue(start,end)/1000.);

        /*
         * Perform QR iteration, accumulating Schur vectors in VR
         *   (Workspace: need N+1, prefer N+HSWORK (see comments) )
         */
        iwrk = itau;
        i__1 = lwork - iwrk + 1;
        lapackf77_dhseqr("S", "V", &n, &ilo, &ihi, &a[a_offset], &lda, WR, WI,
                &vr[vr_offset], &ldvr, &work[iwrk], &i__1, info);
    } else {
        /*
         * Compute eigenvalues only
         *   (Workspace: need N+1, prefer N+HSWORK (see comments) )
         */
        iwrk = itau;
        i__1 = lwork - iwrk + 1;
        lapackf77_dhseqr("E", "N", &n, &ilo, &ihi, &a[a_offset], &lda, WR, WI,
                &vr[vr_offset], &ldvr, &work[iwrk], &i__1, info);
    }

    /* If INFO > 0 from DHSEQR, then quit */
    if (*info > 0) {
        fprintf(stderr, "DHSEQR returned with info = %d\n", (int) *info);
        goto L50;
    }

    if (wantvl || wantvr) {
        /*
         * Compute left and/or right eigenvectors
         *   (Workspace: need 4*N)
         */
        lapackf77_dtrevc(side_, "B", select, &n, &a[a_offset], &lda, &vl[vl_offset], &ldvl,
                &vr[vr_offset], &ldvr, &n, &nout, &work[iwrk], &ierr);
    }

    if (wantvl) {
        /*
         * Undo balancing of left eigenvectors
         *   (Workspace: need N)
         */
        lapackf77_dgebak("B", "L", &n, &ilo, &ihi,
                         &work[ibal], &n, &vl[vl_offset], &ldvl, &ierr);

        /* Normalize left eigenvectors and make largest component real */
        for (i__ = 1; i__ <= n; ++i__) {
            if ( WI[i__-1] == 0.) {
                scl = magma_cblas_dnrm2(n, &vl[i__ * vl_dim1 + 1], 1);
                scl = 1. / scl;
                blasf77_dscal( &n, &scl, &vl[i__ * vl_dim1 + 1], &ione );
            } else if (WI[i__-1] > 0.) {
                d__1 = magma_cblas_dnrm2(n, &vl[ i__      * vl_dim1 + 1], 1);
                d__2 = magma_cblas_dnrm2(n, &vl[(i__ + 1) * vl_dim1 + 1], 1);
                scl = lapackf77_dlapy2(&d__1, &d__2);
                scl = 1. / scl;
                blasf77_dscal( &n, &scl, &vl[ i__      * vl_dim1 + 1], &ione );
                blasf77_dscal( &n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &ione );
                i__2 = n;
                for (k = 1; k <= i__2; ++k) {
                    /* Computing 2nd power */
                    d__1 = vl[k + i__ * vl_dim1];
                    /* Computing 2nd power */
                    d__2 = vl[k + (i__ + 1) * vl_dim1];
                    work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2;
                }
                /* Comment:
                   Fortran BLAS does not have to add 1
                   C       BLAS must add one to cblas_idamax */
                k = blasf77_idamax( &n, &work[iwrk], &ione );  //+1;
                lapackf77_dlartg(&vl[k +  i__      * vl_dim1],
                                 &vl[k + (i__ + 1) * vl_dim1], &cs, &sn, &r__);
                blasf77_drot( &n, &vl[ i__      * vl_dim1 + 1], &ione,
                                  &vl[(i__ + 1) * vl_dim1 + 1], &ione, &cs, &sn );
                vl[k + (i__ + 1) * vl_dim1] = 0.;
            }
        }
    }

    if (wantvr) {
        /*
         * Undo balancing of right eigenvectors
         *   (Workspace: need N)
         */
        lapackf77_dgebak("B", "R", &n, &ilo, &ihi, &work[ibal], &n,
                         &vr[vr_offset], &ldvr, &ierr);

        /* Normalize right eigenvectors and make largest component real */
        for (i__ = 1; i__ <= n; ++i__) {
            if (WI[i__-1] == 0.) {
                scl = 1. / magma_cblas_dnrm2(n, &vr[i__ * vr_dim1 + 1], 1);
                blasf77_dscal( &n, &scl, &vr[i__ * vr_dim1 + 1], &ione );
            } else if (WI[i__-1] > 0.) {
                d__1 = magma_cblas_dnrm2(n, &vr[ i__      * vr_dim1 + 1], 1);
                d__2 = magma_cblas_dnrm2(n, &vr[(i__ + 1) * vr_dim1 + 1], 1);
                scl = lapackf77_dlapy2(&d__1, &d__2);
                scl = 1. / scl;
                blasf77_dscal( &n, &scl, &vr[ i__      * vr_dim1 + 1], &ione );
                blasf77_dscal( &n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &ione );
                i__2 = n;
                for (k = 1; k <= i__2; ++k) {
                    /* Computing 2nd power */
                    d__1 = vr[k + i__ * vr_dim1];
                    /* Computing 2nd power */
                    d__2 = vr[k + (i__ + 1) * vr_dim1];
                    work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2;
                }
                /* Comment:
                   Fortran BLAS does not have to add 1
                   C       BLAS must add one to cblas_idamax */
                k = blasf77_idamax( &n, &work[iwrk], &ione );  //+1;
                lapackf77_dlartg(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1],
                        &cs, &sn, &r__);
                blasf77_drot( &n, &vr[ i__      * vr_dim1 + 1], &ione,
                                  &vr[(i__ + 1) * vr_dim1 + 1], &ione, &cs, &sn );
                vr[k + (i__ + 1) * vr_dim1] = 0.;
            }
        }
    }

    /*  Undo scaling if necessary */
L50:
    if (scalea) {
        i__1 = n - *info;
        /* Computing MAX */
        i__3 = n - *info;
        i__2 = max(i__3,1);
        lapackf77_dlascl("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1,
                         WR + (*info), &i__2, &ierr);
        i__1 = n - *info;
        /* Computing MAX */
        i__3 = n - *info;
        i__2 = max(i__3,1);
        lapackf77_dlascl("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1,
                WI + (*info), &i__2, &ierr);
        if (*info > 0) {
            i__1 = ilo - 1;
            lapackf77_dlascl("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1,
                    WR, &n, &ierr);
            i__1 = ilo - 1;
            lapackf77_dlascl("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1,
                    WI, &n, &ierr);
        }
    }

#if defined(VERSION3)
    magma_free( dT );
#endif
    return *info;
} /* magma_dgeev */
示例#3
0
extern "C" magma_int_t
magma_dgeev(
    char jobvl, char jobvr, magma_int_t n,
    double *A, magma_int_t lda,
    double *WR, double *WI,
    double *vl, magma_int_t ldvl,
    double *vr, magma_int_t ldvr,
    double *work, magma_int_t lwork,
    magma_int_t *info )
{
/*  -- MAGMA (version 1.4.1) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       December 2013

    Purpose
    =======
    DGEEV computes for an N-by-N real nonsymmetric matrix A, the
    eigenvalues and, optionally, the left and/or right eigenvectors.

    The right eigenvector v(j) of A satisfies
                     A * v(j) = lambda(j) * v(j)
    where lambda(j) is its eigenvalue.
    The left eigenvector u(j) of A satisfies
                  u(j)**T * A = lambda(j) * u(j)**T
    where u(j)**T denotes the transpose of u(j).

    The computed eigenvectors are normalized to have Euclidean norm
    equal to 1 and largest component real.

    Arguments
    =========
    JOBVL   (input) CHARACTER*1
            = 'N': left eigenvectors of A are not computed;
            = 'V': left eigenvectors of are computed.

    JOBVR   (input) CHARACTER*1
            = 'N': right eigenvectors of A are not computed;
            = 'V': right eigenvectors of A are computed.

    N       (input) INTEGER
            The order of the matrix A. N >= 0.

    A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
            On entry, the N-by-N matrix A.
            On exit, A has been overwritten.

    LDA     (input) INTEGER
            The leading dimension of the array A.  LDA >= max(1,N).

    WR      (output) DOUBLE PRECISION array, dimension (N)
    WI      (output) DOUBLE PRECISION array, dimension (N)
            WR and WI contain the real and imaginary parts,
            respectively, of the computed eigenvalues.  Complex
            conjugate pairs of eigenvalues appear consecutively
            with the eigenvalue having the positive imaginary part
            first.

    VL      (output) DOUBLE PRECISION array, dimension (LDVL,N)
            If JOBVL = 'V', the left eigenvectors u(j) are stored one
            after another in the columns of VL, in the same order
            as their eigenvalues.
            If JOBVL = 'N', VL is not referenced.
            u(j) = VL(:,j), the j-th column of VL.

    LDVL    (input) INTEGER
            The leading dimension of the array VL.  LDVL >= 1; if
            JOBVL = 'V', LDVL >= N.

    VR      (output) DOUBLE PRECISION array, dimension (LDVR,N)
            If JOBVR = 'V', the right eigenvectors v(j) are stored one
            after another in the columns of VR, in the same order
            as their eigenvalues.
            If JOBVR = 'N', VR is not referenced.
            v(j) = VR(:,j), the j-th column of VR.

    LDVR    (input) INTEGER
            The leading dimension of the array VR.  LDVR >= 1; if
            JOBVR = 'V', LDVR >= N.

    WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.

    LWORK   (input) INTEGER
            The dimension of the array WORK.  LWORK >= (2+nb)*N.
            For optimal performance, LWORK >= (2+2*nb)*N.

            If LWORK = -1, then a workspace query is assumed; the routine
            only calculates the optimal size of the WORK array, returns
            this value as the first entry of the WORK array, and no error
            message related to LWORK is issued by XERBLA.

    INFO    (output) INTEGER
            = 0:  successful exit
            < 0:  if INFO = -i, the i-th argument had an illegal value.
            > 0:  if INFO = i, the QR algorithm failed to compute all the
                  eigenvalues, and no eigenvectors have been computed;
                  elements and i+1:N of W contain eigenvalues which have
                  converged.
    =====================================================================    */

    #define vl(i,j)  (vl + (i) + (j)*ldvl)
    #define vr(i,j)  (vr + (i) + (j)*ldvr)
    
    magma_int_t c_one = 1;
    magma_int_t c_zero = 0;
    
    double d__1, d__2;
    double r, cs, sn, scl;
    double dum[1], eps;
    double anrm, cscale, bignum, smlnum;
    magma_int_t i, k, ilo, ihi;
    magma_int_t ibal, ierr, itau, iwrk, nout, liwrk, i__1, i__2, nb;
    magma_int_t scalea, minwrk, lquery, wantvl, wantvr, select[1];

    char side[2]   = {0, 0};
    char jobvl_[2] = {jobvl, 0};
    char jobvr_[2] = {jobvr, 0};

    *info = 0;
    lquery = lwork == -1;
    wantvl = lapackf77_lsame( jobvl_, "V" );
    wantvr = lapackf77_lsame( jobvr_, "V" );
    if (! wantvl && ! lapackf77_lsame( jobvl_, "N" )) {
        *info = -1;
    } else if (! wantvr && ! lapackf77_lsame( jobvr_, "N" )) {
        *info = -2;
    } else if (n < 0) {
        *info = -3;
    } else if (lda < max(1,n)) {
        *info = -5;
    } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) {
        *info = -9;
    } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) {
        *info = -11;
    }

    /* Compute workspace */
    nb = magma_get_dgehrd_nb( n );
    if (*info == 0) {
        minwrk = (2+nb)*n;
        work[0] = MAGMA_D_MAKE( (double) minwrk, 0. );
        
        if (lwork < minwrk && ! lquery) {
            *info = -13;
        }
    }

    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }
    else if (lquery) {
        return *info;
    }

    /* Quick return if possible */
    if (n == 0) {
        return *info;
    }
    
    #if defined(VERSION3)
    double *dT;
    if (MAGMA_SUCCESS != magma_dmalloc( &dT, nb*n )) {
        *info = MAGMA_ERR_DEVICE_ALLOC;
        return *info;
    }
    #endif

    /* Get machine constants */
    eps    = lapackf77_dlamch( "P" );
    smlnum = lapackf77_dlamch( "S" );
    bignum = 1. / smlnum;
    lapackf77_dlabad( &smlnum, &bignum );
    smlnum = magma_dsqrt( smlnum ) / eps;
    bignum = 1. / smlnum;

    /* Scale A if max element outside range [SMLNUM,BIGNUM] */
    anrm = lapackf77_dlange( "M", &n, &n, A, &lda, dum );
    scalea = 0;
    if (anrm > 0. && anrm < smlnum) {
        scalea = 1;
        cscale = smlnum;
    } else if (anrm > bignum) {
        scalea = 1;
        cscale = bignum;
    }
    if (scalea) {
        lapackf77_dlascl( "G", &c_zero, &c_zero, &anrm, &cscale, &n, &n, A, &lda, &ierr );
    }

    /* Balance the matrix
     * (Workspace: need N) */
    ibal = 0;
    lapackf77_dgebal( "B", &n, A, &lda, &ilo, &ihi, &work[ibal], &ierr );

    /* Reduce to upper Hessenberg form
     * (Workspace: need 3*N, prefer 2*N + N*NB) */
    itau = ibal + n;
    iwrk = itau + n;
    liwrk = lwork - iwrk;

    #if defined(VERSION1)
        // Version 1 - LAPACK
        lapackf77_dgehrd( &n, &ilo, &ihi, A, &lda,
                          &work[itau], &work[iwrk], &liwrk, &ierr );
    #elif defined(VERSION2)
        // Version 2 - LAPACK consistent HRD
        magma_dgehrd2( n, ilo, ihi, A, lda,
                       &work[itau], &work[iwrk], liwrk, &ierr );
    #elif defined(VERSION3)
        // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored,
        magma_dgehrd( n, ilo, ihi, A, lda,
                      &work[itau], &work[iwrk], liwrk, dT, &ierr );
    #endif

    if (wantvl) {
        /* Want left eigenvectors
         * Copy Householder vectors to VL */
        side[0] = 'L';
        lapackf77_dlacpy( MagmaLowerStr, &n, &n,
                          A, &lda, vl, &ldvl );

        /* Generate orthogonal matrix in VL
         * (Workspace: need 3*N-1, prefer 2*N + (N-1)*NB) */
        #if defined(VERSION1) || defined(VERSION2)
            // Version 1 & 2 - LAPACK
            lapackf77_dorghr( &n, &ilo, &ihi, vl, &ldvl, &work[itau],
                              &work[iwrk], &liwrk, &ierr );
        #elif defined(VERSION3)
            // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored
            magma_dorghr( n, ilo, ihi, vl, ldvl, &work[itau], dT, nb, &ierr );
        #endif

        /* Perform QR iteration, accumulating Schur vectors in VL
         * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */
        iwrk = itau;
        liwrk = lwork - iwrk;
        lapackf77_dhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, WR, WI,
                          vl, &ldvl, &work[iwrk], &liwrk, info );

        if (wantvr) {
            /* Want left and right eigenvectors
             * Copy Schur vectors to VR */
            side[0] = 'B';
            lapackf77_dlacpy( "F", &n, &n, vl, &ldvl, vr, &ldvr );
        }
    }
    else if (wantvr) {
        /* Want right eigenvectors
         * Copy Householder vectors to VR */
        side[0] = 'R';
        lapackf77_dlacpy( "L", &n, &n, A, &lda, vr, &ldvr );

        /* Generate orthogonal matrix in VR
         * (Workspace: need 3*N-1, prefer 2*N + (N-1)*NB) */
        #if defined(VERSION1) || defined(VERSION2)
            // Version 1 & 2 - LAPACK
            lapackf77_dorghr( &n, &ilo, &ihi, vr, &ldvr, &work[itau],
                              &work[iwrk], &liwrk, &ierr );
        #elif defined(VERSION3)
            // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored
            magma_dorghr( n, ilo, ihi, vr, ldvr, &work[itau], dT, nb, &ierr );
        #endif

        /* Perform QR iteration, accumulating Schur vectors in VR
         * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */
        iwrk = itau;
        liwrk = lwork - iwrk;
        lapackf77_dhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, WR, WI,
                          vr, &ldvr, &work[iwrk], &liwrk, info );
    }
    else {
        /* Compute eigenvalues only
         * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */
        iwrk = itau;
        liwrk = lwork - iwrk;
        lapackf77_dhseqr( "E", "N", &n, &ilo, &ihi, A, &lda, WR, WI,
                          vr, &ldvr, &work[iwrk], &liwrk, info );
    }

    /* If INFO > 0 from DHSEQR, then quit */
    if (*info > 0) {
        goto CLEANUP;
    }

    if (wantvl || wantvr) {
        /* Compute left and/or right eigenvectors
         * (Workspace: need 4*N) */
        liwrk = lwork - iwrk;
        #if TREVC_VERSION == 1
        lapackf77_dtrevc( side, "B", select, &n, A, &lda, vl, &ldvl,
                          vr, &ldvr, &n, &nout, &work[iwrk], &ierr );
        #elif TREVC_VERSION == 2
        lapackf77_dtrevc3( side, "B", select, &n, A, &lda, vl, &ldvl,
                           vr, &ldvr, &n, &nout, &work[iwrk], &liwrk, &ierr );
        #endif
    }

    if (wantvl) {
        /* Undo balancing of left eigenvectors
         * (Workspace: need N) */
        lapackf77_dgebak( "B", "L", &n, &ilo, &ihi, &work[ibal], &n,
                          vl, &ldvl, &ierr );

        /* Normalize left eigenvectors and make largest component real */
        for (i = 0; i < n; ++i) {
            if ( WI[i] == 0. ) {
                scl = 1. / cblas_dnrm2( n, vl(0,i), 1 );
                cblas_dscal( n, scl, vl(0,i), 1 );
            }
            else if ( WI[i] > 0. ) {
                d__1 = cblas_dnrm2( n, vl(0,i),   1 );
                d__2 = cblas_dnrm2( n, vl(0,i+1), 1 );
                scl = 1. / lapackf77_dlapy2( &d__1, &d__2 );
                cblas_dscal( n, scl, vl(0,i),   1 );
                cblas_dscal( n, scl, vl(0,i+1), 1 );
                for (k = 0; k < n; ++k) {
                    /* Computing 2nd power */
                    d__1 = *vl(k,i);
                    d__2 = *vl(k,i+1);
                    work[iwrk + k] = d__1*d__1 + d__2*d__2;
                }
                k = cblas_idamax( n, &work[iwrk], 1 );
                lapackf77_dlartg( vl(k,i), vl(k,i+1), &cs, &sn, &r );
                cblas_drot( n, vl(0,i), 1, vl(0,i+1), 1, cs, sn );
                *vl(k,i+1) = 0.;
            }
        }
    }

    if (wantvr) {
        /* Undo balancing of right eigenvectors
         * (Workspace: need N) */
        lapackf77_dgebak( "B", "R", &n, &ilo, &ihi, &work[ibal], &n,
                          vr, &ldvr, &ierr );

        /* Normalize right eigenvectors and make largest component real */
        for (i = 0; i < n; ++i) {
            if ( WI[i] == 0. ) {
                scl = 1. / cblas_dnrm2( n, vr(0,i), 1 );
                cblas_dscal( n, scl, vr(0,i), 1 );
            }
            else if ( WI[i] > 0. ) {
                d__1 = cblas_dnrm2( n, vr(0,i),   1 );
                d__2 = cblas_dnrm2( n, vr(0,i+1), 1 );
                scl = 1. / lapackf77_dlapy2( &d__1, &d__2 );
                cblas_dscal( n, scl, vr(0,i),   1 );
                cblas_dscal( n, scl, vr(0,i+1), 1 );
                for (k = 0; k < n; ++k) {
                    /* Computing 2nd power */
                    d__1 = *vr(k,i);
                    d__2 = *vr(k,i+1);
                    work[iwrk + k] = d__1*d__1 + d__2*d__2;
                }
                k = cblas_idamax( n, &work[iwrk], 1 );
                lapackf77_dlartg( vr(k,i), vr(k,i+1), &cs, &sn, &r );
                cblas_drot( n, vr(0,i), 1, vr(0,i+1), 1, cs, sn );
                *vr(k,i+1) = 0.;
            }
        }
    }

CLEANUP:
    /* Undo scaling if necessary */
    if (scalea) {
        i__1 = n - (*info);
        i__2 = max( n - (*info), 1 );
        lapackf77_dlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
                          WR + (*info), &i__2, &ierr );
        lapackf77_dlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
                          WI + (*info), &i__2, &ierr );
        if (*info > 0) {
            i__1 = ilo - 1;
            lapackf77_dlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
                              WR, &n, &ierr );
            lapackf77_dlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
                              WI, &n, &ierr );
        }
    }

    #if defined(VERSION3)
    magma_free( dT );
    #endif
    
    return *info;
} /* magma_dgeev */
示例#4
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing dgehrd
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    real_Double_t    gflops, gpu_perf, gpu_time, cpu_perf, cpu_time;
    double *h_A, *h_R, *h_Q, *h_work, *tau, *twork, *T;
    magmaDouble_ptr dT;
    #ifdef COMPLEX
    double      *rwork;
    #endif
    double      eps, result[2];
    magma_int_t N, n2, lda, nb, lwork, ltwork, info;
    magma_int_t ione     = 1;
    magma_int_t ISEED[4] = {0,0,0,1};
    magma_int_t status = 0;
    
    eps   = lapackf77_dlamch( "E" );
    
    magma_opts opts;
    opts.parse_opts( argc, argv );
    
    double tol = opts.tolerance * lapackf77_dlamch("E");
    
    // pass ngpu = -1 to test multi-GPU code using 1 gpu
    magma_int_t abs_ngpu = abs( opts.ngpu );
    
    printf("%% version %d, ngpu = %d\n", int(opts.version), int(abs_ngpu) );
    
    printf("%%   N   CPU Gflop/s (sec)   GPU Gflop/s (sec)   |A-QHQ^H|/N|A|   |I-QQ^H|/N\n");
    printf("%%==========================================================================\n");
    for( int itest = 0; itest < opts.ntest; ++itest ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            N = opts.nsize[itest];
            lda    = N;
            n2     = lda*N;
            nb     = magma_get_dgehrd_nb( N );
            // magma needs larger workspace than lapack, esp. multi-gpu verison
            lwork  = N*nb;
            if (opts.ngpu != 1) {
                lwork += N*nb*abs_ngpu;
            }
            gflops = FLOPS_DGEHRD( N ) / 1e9;
            
            TESTING_MALLOC_CPU( h_A,    double, n2    );
            TESTING_MALLOC_CPU( tau,    double, N     );
            TESTING_MALLOC_CPU( T,      double, nb*N  );  // for multi GPU
            
            TESTING_MALLOC_PIN( h_R,    double, n2    );
            TESTING_MALLOC_PIN( h_work, double, lwork );
            
            TESTING_MALLOC_DEV( dT,     double, nb*N  );  // for single GPU
            
            /* Initialize the matrices */
            lapackf77_dlarnv( &ione, ISEED, &n2, h_A );
            lapackf77_dlacpy( MagmaFullStr, &N, &N, h_A, &lda, h_R, &lda );
            
            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            gpu_time = magma_wtime();
            if ( opts.version == 1 ) {
                if ( opts.ngpu == 1 ) {
                    magma_dgehrd( N, ione, N, h_R, lda, tau, h_work, lwork, dT, &info );
                }
                else {
                    magma_dgehrd_m( N, ione, N, h_R, lda, tau, h_work, lwork, T, &info );
                }
            }
            else {
                // LAPACK-complaint arguments, no dT array
                printf( "magma_dgehrd2\n" );
                magma_dgehrd2( N, ione, N, h_R, lda, tau, h_work, lwork, &info );
            }
            gpu_time = magma_wtime() - gpu_time;
            gpu_perf = gflops / gpu_time;
            if (info != 0) {
                printf("magma_dgehrd returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            }
            
            /* =====================================================================
               Check the factorization
               =================================================================== */
            if ( opts.check ) {
                ltwork = 2*N*N;
                TESTING_MALLOC_PIN( h_Q,   double, lda*N  );
                TESTING_MALLOC_CPU( twork, double, ltwork );
                #ifdef COMPLEX
                TESTING_MALLOC_CPU( rwork, double, N );
                #endif
                
                lapackf77_dlacpy( MagmaFullStr, &N, &N, h_R, &lda, h_Q, &lda );
                for( int j = 0; j < N-1; ++j )
                    for( int i = j+2; i < N; ++i )
                        h_R[i+j*lda] = MAGMA_D_ZERO;
                
                if ( opts.version == 1 ) {
                    if ( opts.ngpu != 1 ) {
                        magma_dsetmatrix( nb, N, T, nb, dT, nb, opts.queue );
                    }
                    magma_dorghr( N, ione, N, h_Q, lda, tau, dT, nb, &info );
                }
                else {
                    // for magma_dgehrd2, no dT array
                    lapackf77_dorghr( &N, &ione, &N, h_Q, &lda, tau, h_work, &lwork, &info );
                }
                if (info != 0) {
                    printf("magma_dorghr returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
                    return -1;
                }
                lapackf77_dhst01( &N, &ione, &N,
                                  h_A, &lda, h_R, &lda,
                                  h_Q, &lda, twork, &ltwork,
                                  #ifdef COMPLEX
                                  rwork,
                                  #endif
                                  result );
                
                TESTING_FREE_PIN( h_Q   );
                TESTING_FREE_CPU( twork );
                #ifdef COMPLEX
                TESTING_FREE_CPU( rwork );
                #endif
            }
            
            /* =====================================================================
               Performs operation using LAPACK
               =================================================================== */
            if ( opts.lapack ) {
                cpu_time = magma_wtime();
                lapackf77_dgehrd( &N, &ione, &N, h_A, &lda, tau, h_work, &lwork, &info );
                cpu_time = magma_wtime() - cpu_time;
                cpu_perf = gflops / cpu_time;
                if (info != 0) {
                    printf("lapackf77_dgehrd returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
                }
            }
            
            /* =====================================================================
               Print performance and error.
               =================================================================== */
            if ( opts.lapack ) {
                printf("%5d   %7.2f (%7.2f)   %7.2f (%7.2f)",
                       (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time );
            }
            else {
                printf("%5d     ---   (  ---  )   %7.2f (%7.2f)",
                       (int) N, gpu_perf, gpu_time );
            }
            if ( opts.check ) {
                bool okay = (result[0]*eps < tol) && (result[1]*eps < tol);
                status += ! okay;
                printf("   %8.2e        %8.2e   %s\n",
                       result[0]*eps, result[1]*eps,
                       (okay ? "ok" : "failed") );
            }
            else {
                printf("     ---             ---\n");
            }
            
            TESTING_FREE_CPU( h_A    );
            TESTING_FREE_CPU( tau    );
            TESTING_FREE_CPU( T      );
            
            TESTING_FREE_PIN( h_R    );
            TESTING_FREE_PIN( h_work );
            
            TESTING_FREE_DEV( dT     );
            fflush( stdout );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }
    
    opts.cleanup();
    TESTING_FINALIZE();
    return status;
}
示例#5
0
/**
    Purpose
    -------
    DGEEV computes for an N-by-N real nonsymmetric matrix A, the
    eigenvalues and, optionally, the left and/or right eigenvectors.

    The right eigenvector v(j) of A satisfies
                     A * v(j) = lambda(j) * v(j)
    where lambda(j) is its eigenvalue.
    The left eigenvector u(j) of A satisfies
                  u(j)**T * A = lambda(j) * u(j)**T
    where u(j)**T denotes the transpose of u(j).

    The computed eigenvectors are normalized to have Euclidean norm
    equal to 1 and largest component real.

    Arguments
    ---------
    @param[in]
    jobvl   magma_vec_t
      -     = MagmaNoVec: left eigenvectors of A are not computed;
      -     = MagmaVec:   left eigenvectors of are computed.

    @param[in]
    jobvr   magma_vec_t
      -     = MagmaNoVec: right eigenvectors of A are not computed;
      -     = MagmaVec:   right eigenvectors of A are computed.

    @param[in]
    n       INTEGER
            The order of the matrix A. N >= 0.

    @param[in,out]
    A       DOUBLE PRECISION array, dimension (LDA,N)
            On entry, the N-by-N matrix A.
            On exit, A has been overwritten.

    @param[in]
    lda     INTEGER
            The leading dimension of the array A.  LDA >= max(1,N).

    @param[out]
    wr      DOUBLE PRECISION array, dimension (N)
    @param[out]
    wi      DOUBLE PRECISION array, dimension (N)
            WR and WI contain the real and imaginary parts,
            respectively, of the computed eigenvalues.  Complex
            conjugate pairs of eigenvalues appear consecutively
            with the eigenvalue having the positive imaginary part
            first.

    @param[out]
    VL      DOUBLE PRECISION array, dimension (LDVL,N)
            If JOBVL = MagmaVec, the left eigenvectors u(j) are stored one
            after another in the columns of VL, in the same order
            as their eigenvalues.
            If JOBVL = MagmaNoVec, VL is not referenced.
            u(j) = VL(:,j), the j-th column of VL.

    @param[in]
    ldvl    INTEGER
            The leading dimension of the array VL.  LDVL >= 1; if
            JOBVL = MagmaVec, LDVL >= N.

    @param[out]
    VR      DOUBLE PRECISION array, dimension (LDVR,N)
            If JOBVR = MagmaVec, the right eigenvectors v(j) are stored one
            after another in the columns of VR, in the same order
            as their eigenvalues.
            If JOBVR = MagmaNoVec, VR is not referenced.
            v(j) = VR(:,j), the j-th column of VR.

    @param[in]
    ldvr    INTEGER
            The leading dimension of the array VR.  LDVR >= 1; if
            JOBVR = MagmaVec, LDVR >= N.

    @param[out]
    work    (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
            On exit, if INFO = 0, WORK[0] returns the optimal LWORK.

    @param[in]
    lwork   INTEGER
            The dimension of the array WORK.  LWORK >= (2+nb)*N.
            For optimal performance, LWORK >= (2+2*nb)*N.
    \n
            If LWORK = -1, then a workspace query is assumed; the routine
            only calculates the optimal size of the WORK array, returns
            this value as the first entry of the WORK array, and no error
            message related to LWORK is issued by XERBLA.

    @param[out]
    info    INTEGER
      -     = 0:  successful exit
      -     < 0:  if INFO = -i, the i-th argument had an illegal value.
      -     > 0:  if INFO = i, the QR algorithm failed to compute all the
                  eigenvalues, and no eigenvectors have been computed;
                  elements and i+1:N of W contain eigenvalues which have
                  converged.

    @ingroup magma_dgeev_driver
    ********************************************************************/
extern "C" magma_int_t
magma_dgeev_m(
    magma_vec_t jobvl, magma_vec_t jobvr, magma_int_t n,
    double *A, magma_int_t lda,
    double *wr, double *wi,
    double *VL, magma_int_t ldvl,
    double *VR, magma_int_t ldvr,
    double *work, magma_int_t lwork,
    magma_int_t *info )
{
    #define VL(i,j)  (VL + (i) + (j)*ldvl)
    #define VR(i,j)  (VR + (i) + (j)*ldvr)
    
    const magma_int_t ione  = 1;
    const magma_int_t izero = 0;
    
    double d__1, d__2;
    double r, cs, sn, scl;
    double dum[1], eps;
    double anrm, cscale, bignum, smlnum;
    magma_int_t i, k, ilo, ihi;
    magma_int_t ibal, ierr, itau, iwrk, nout, liwrk, nb;
    magma_int_t scalea, minwrk, optwrk, lquery, wantvl, wantvr, select[1];
    
    magma_side_t side = MagmaRight;
    
    magma_timer_t time_total=0, time_gehrd=0, time_unghr=0, time_hseqr=0, time_trevc=0, time_sum=0;
    magma_flops_t flop_total=0, flop_gehrd=0, flop_unghr=0, flop_hseqr=0, flop_trevc=0, flop_sum=0;
    timer_start( time_total );
    flops_start( flop_total );
    
    *info = 0;
    lquery = (lwork == -1);
    wantvl = (jobvl == MagmaVec);
    wantvr = (jobvr == MagmaVec);
    if (! wantvl && jobvl != MagmaNoVec) {
        *info = -1;
    } else if (! wantvr && jobvr != MagmaNoVec) {
        *info = -2;
    } else if (n < 0) {
        *info = -3;
    } else if (lda < max(1,n)) {
        *info = -5;
    } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) {
        *info = -9;
    } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) {
        *info = -11;
    }

    /* Compute workspace */
    nb = magma_get_dgehrd_nb( n );
    if (*info == 0) {
        minwrk = (2 +   nb)*n;
        optwrk = (2 + 2*nb)*n;
        work[0] = MAGMA_D_MAKE( (double) optwrk, 0. );
        
        if (lwork < minwrk && ! lquery) {
            *info = -13;
        }
    }

    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }
    else if (lquery) {
        return *info;
    }

    /* Quick return if possible */
    if (n == 0) {
        return *info;
    }
   
    #if defined(Version3) || defined(Version4) || defined(Version5)
    double *dT;
    if (MAGMA_SUCCESS != magma_dmalloc( &dT, nb*n )) {
        *info = MAGMA_ERR_DEVICE_ALLOC;
        return *info;
    }
    #endif
    #if defined(Version4) || defined(Version5)
    double *T;
    if (MAGMA_SUCCESS != magma_dmalloc_cpu( &T, nb*n )) {
        magma_free( dT );
        *info = MAGMA_ERR_HOST_ALLOC;
        return *info;
    }
    #endif

    /* Get machine constants */
    eps    = lapackf77_dlamch( "P" );
    smlnum = lapackf77_dlamch( "S" );
    bignum = 1. / smlnum;
    lapackf77_dlabad( &smlnum, &bignum );
    smlnum = magma_dsqrt( smlnum ) / eps;
    bignum = 1. / smlnum;

    /* Scale A if max element outside range [SMLNUM,BIGNUM] */
    anrm = lapackf77_dlange( "M", &n, &n, A, &lda, dum );
    scalea = 0;
    if (anrm > 0. && anrm < smlnum) {
        scalea = 1;
        cscale = smlnum;
    } else if (anrm > bignum) {
        scalea = 1;
        cscale = bignum;
    }
    if (scalea) {
        lapackf77_dlascl( "G", &izero, &izero, &anrm, &cscale, &n, &n, A, &lda, &ierr );
    }

    /* Balance the matrix
     * (Workspace: need N)
     *  - this space is reserved until after gebak */
    ibal = 0;
    lapackf77_dgebal( "B", &n, A, &lda, &ilo, &ihi, &work[ibal], &ierr );

    /* Reduce to upper Hessenberg form
     * (Workspace: need 3*N, prefer 2*N + N*NB)
     *  - including N reserved for gebal/gebak, unused by dgehrd */
    itau = ibal + n;
    iwrk = itau + n;
    liwrk = lwork - iwrk;

    timer_start( time_gehrd );
    flops_start( flop_gehrd );
    #if defined(Version1)
        // Version 1 - LAPACK
        lapackf77_dgehrd( &n, &ilo, &ihi, A, &lda,
                          &work[itau], &work[iwrk], &liwrk, &ierr );
    #elif defined(Version2)
        // Version 2 - LAPACK consistent HRD
        magma_dgehrd2( n, ilo, ihi, A, lda,
                       &work[itau], &work[iwrk], liwrk, &ierr );
    #elif defined(Version3)
        // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored,
        magma_dgehrd( n, ilo, ihi, A, lda,
                      &work[itau], &work[iwrk], liwrk, dT, &ierr );
    #elif defined(Version4) || defined(Version5)
        // Version 4 - Multi-GPU, T on host
        magma_dgehrd_m( n, ilo, ihi, A, lda,
                        &work[itau], &work[iwrk], liwrk, T, &ierr );
        magma_dsetmatrix( nb, n, T, nb, dT, nb );
    #endif
    time_sum += timer_stop( time_gehrd );
    flop_sum += flops_stop( flop_gehrd );

    if (wantvl) {
        /* Want left eigenvectors
         * Copy Householder vectors to VL */
        side = MagmaLeft;
        lapackf77_dlacpy( MagmaLowerStr, &n, &n, A, &lda, VL, &ldvl );

        /* Generate orthogonal matrix in VL
         * (Workspace: need 3*N-1, prefer 2*N + (N-1)*NB)
         *  - including N reserved for gebal/gebak, unused by dorghr */
        timer_start( time_unghr );
        flops_start( flop_unghr );
        #if defined(Version1) || defined(Version2)
            // Version 1 & 2 - LAPACK
            lapackf77_dorghr( &n, &ilo, &ihi, VL, &ldvl, &work[itau],
                              &work[iwrk], &liwrk, &ierr );
        #elif defined(Version3) || defined(Version4)
            // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored
            magma_dorghr( n, ilo, ihi, VL, ldvl, &work[itau], dT, nb, &ierr );
        #elif defined(Version5)
            // Version 5 - Multi-GPU, T on host
            magma_dorghr_m( n, ilo, ihi, VL, ldvl, &work[itau], T, nb, &ierr );
        #endif
        time_sum += timer_stop( time_unghr );
        flop_sum += flops_stop( flop_unghr );

        timer_start( time_hseqr );
        flops_start( flop_hseqr );
        /* Perform QR iteration, accumulating Schur vectors in VL
         * (Workspace: need N+1, prefer N+HSWORK (see comments) )
         *  - including N reserved for gebal/gebak, unused by dhseqr */
        iwrk = itau;
        liwrk = lwork - iwrk;
        lapackf77_dhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, wr, wi,
                          VL, &ldvl, &work[iwrk], &liwrk, info );
        time_sum += timer_stop( time_hseqr );
        flop_sum += flops_stop( flop_hseqr );

        if (wantvr) {
            /* Want left and right eigenvectors
             * Copy Schur vectors to VR */
            side = MagmaBothSides;
            lapackf77_dlacpy( "F", &n, &n, VL, &ldvl, VR, &ldvr );
        }
    }
    else if (wantvr) {
        /* Want right eigenvectors
         * Copy Householder vectors to VR */
        side = MagmaRight;
        lapackf77_dlacpy( "L", &n, &n, A, &lda, VR, &ldvr );

        /* Generate orthogonal matrix in VR
         * (Workspace: need 3*N-1, prefer 2*N + (N-1)*NB)
         *  - including N reserved for gebal/gebak, unused by dorghr */
        timer_start( time_unghr );
        flops_start( flop_unghr );
        #if defined(Version1) || defined(Version2)
            // Version 1 & 2 - LAPACK
            lapackf77_dorghr( &n, &ilo, &ihi, VR, &ldvr, &work[itau],
                              &work[iwrk], &liwrk, &ierr );
        #elif defined(Version3) || defined(Version4)
            // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored
            magma_dorghr( n, ilo, ihi, VR, ldvr, &work[itau], dT, nb, &ierr );
        #elif defined(Version5)
            // Version 5 - Multi-GPU, T on host
            magma_dorghr_m( n, ilo, ihi, VR, ldvr, &work[itau], T, nb, &ierr );
        #endif
        time_sum += timer_stop( time_unghr );
        flop_sum += flops_stop( flop_unghr );

        /* Perform QR iteration, accumulating Schur vectors in VR
         * (Workspace: need N+1, prefer N+HSWORK (see comments) )
         *  - including N reserved for gebal/gebak, unused by dhseqr */
        timer_start( time_hseqr );
        flops_start( flop_hseqr );
        iwrk = itau;
        liwrk = lwork - iwrk;
        lapackf77_dhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, wr, wi,
                          VR, &ldvr, &work[iwrk], &liwrk, info );
        time_sum += timer_stop( time_hseqr );
        flop_sum += flops_stop( flop_hseqr );
    }
    else {
        /* Compute eigenvalues only
         * (Workspace: need N+1, prefer N+HSWORK (see comments) )
         *  - including N reserved for gebal/gebak, unused by dhseqr */
        timer_start( time_hseqr );
        flops_start( flop_hseqr );
        iwrk = itau;
        liwrk = lwork - iwrk;
        lapackf77_dhseqr( "E", "N", &n, &ilo, &ihi, A, &lda, wr, wi,
                          VR, &ldvr, &work[iwrk], &liwrk, info );
        time_sum += timer_stop( time_hseqr );
        flop_sum += flops_stop( flop_hseqr );
    }

    /* If INFO > 0 from DHSEQR, then quit */
    if (*info > 0) {
        goto CLEANUP;
    }

    timer_start( time_trevc );
    flops_start( flop_trevc );
    if (wantvl || wantvr) {
        /* Compute left and/or right eigenvectors
         * (Workspace: need 4*N, prefer (2 + 2*nb)*N)
         *  - including N reserved for gebal/gebak, unused by dtrevc */
        liwrk = lwork - iwrk;
        #if TREVC_VERSION == 1
        lapackf77_dtrevc( lapack_side_const(side), "B", select, &n, A, &lda, VL, &ldvl,
                          VR, &ldvr, &n, &nout, &work[iwrk], &ierr );
        #elif TREVC_VERSION == 2
        lapackf77_dtrevc3( lapack_side_const(side), "B", select, &n, A, &lda, VL, &ldvl,
                           VR, &ldvr, &n, &nout, &work[iwrk], &liwrk, &ierr );
        #elif TREVC_VERSION == 3
        magma_dtrevc3( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl,
                       VR, ldvr, n, &nout, &work[iwrk], liwrk, &ierr );
        #elif TREVC_VERSION == 4
        magma_dtrevc3_mt( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl,
                          VR, ldvr, n, &nout, &work[iwrk], liwrk, &ierr );
        #elif TREVC_VERSION == 5
        magma_dtrevc3_mt_gpu( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl,
                              VR, ldvr, n, &nout, &work[iwrk], liwrk, &ierr );
        #else
        #error Unknown TREVC_VERSION
        #endif
    }
    time_sum += timer_stop( time_trevc );
    flop_sum += flops_stop( flop_trevc );

    if (wantvl) {
        /* Undo balancing of left eigenvectors
         * (Workspace: need N) */
        lapackf77_dgebak( "B", "L", &n, &ilo, &ihi, &work[ibal], &n,
                          VL, &ldvl, &ierr );

        /* Normalize left eigenvectors and make largest component real */
        for (i = 0; i < n; ++i) {
            if ( wi[i] == 0. ) {
                scl = 1. / magma_cblas_dnrm2( n, VL(0,i), 1 );
                blasf77_dscal( &n, &scl, VL(0,i), &ione );
            }
            else if ( wi[i] > 0. ) {
                d__1 = magma_cblas_dnrm2( n, VL(0,i), 1 );
                d__2 = magma_cblas_dnrm2( n, VL(0,i+1), 1 );
                scl = 1. / lapackf77_dlapy2( &d__1, &d__2 );
                blasf77_dscal( &n, &scl, VL(0,i), &ione );
                blasf77_dscal( &n, &scl, VL(0,i+1), &ione );
                for (k = 0; k < n; ++k) {
                    /* Computing 2nd power */
                    d__1 = *VL(k,i);
                    d__2 = *VL(k,i+1);
                    work[iwrk + k] = d__1*d__1 + d__2*d__2;
                }
                k = blasf77_idamax( &n, &work[iwrk], &ione ) - 1;  // subtract 1; k is 0-based
                lapackf77_dlartg( VL(k,i), VL(k,i+1), &cs, &sn, &r );
                blasf77_drot( &n, VL(0,i), &ione, VL(0,i+1), &ione, &cs, &sn );
                *VL(k,i+1) = 0.;
            }
        }
    }

    if (wantvr) {
        /* Undo balancing of right eigenvectors
         * (Workspace: need N) */
        lapackf77_dgebak( "B", "R", &n, &ilo, &ihi, &work[ibal], &n,
                          VR, &ldvr, &ierr );

        /* Normalize right eigenvectors and make largest component real */
        for (i = 0; i < n; ++i) {
            if ( wi[i] == 0. ) {
                scl = 1. / magma_cblas_dnrm2( n, VR(0,i), 1 );
                blasf77_dscal( &n, &scl, VR(0,i), &ione );
            }
            else if ( wi[i] > 0. ) {
                d__1 = magma_cblas_dnrm2( n, VR(0,i), 1 );
                d__2 = magma_cblas_dnrm2( n, VR(0,i+1), 1 );
                scl = 1. / lapackf77_dlapy2( &d__1, &d__2 );
                blasf77_dscal( &n, &scl, VR(0,i), &ione );
                blasf77_dscal( &n, &scl, VR(0,i+1), &ione );
                for (k = 0; k < n; ++k) {
                    /* Computing 2nd power */
                    d__1 = *VR(k,i);
                    d__2 = *VR(k,i+1);
                    work[iwrk + k] = d__1*d__1 + d__2*d__2;
                }
                k = blasf77_idamax( &n, &work[iwrk], &ione ) - 1;  // subtract 1; k is 0-based
                lapackf77_dlartg( VR(k,i), VR(k,i+1), &cs, &sn, &r );
                blasf77_drot( &n, VR(0,i), &ione, VR(0,i+1), &ione, &cs, &sn );
                *VR(k,i+1) = 0.;
            }
        }
    }

CLEANUP:
    /* Undo scaling if necessary */
    if (scalea) {
        // converged eigenvalues, stored in wr[i+1:n] and wi[i+1:n] for i = INFO
        magma_int_t nval = n - (*info);
        magma_int_t ld = max( nval, 1 );
        lapackf77_dlascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, wr + (*info), &ld, &ierr );
        lapackf77_dlascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, wi + (*info), &ld, &ierr );
        if (*info > 0) {
            // first ilo columns were already upper triangular,
            // so the corresponding eigenvalues are also valid.
            nval = ilo - 1;
            lapackf77_dlascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, wr, &n, &ierr );
            lapackf77_dlascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, wi, &n, &ierr );
        }
    }

    #if defined(Version3) || defined(Version4) || defined(Version5)
    magma_free( dT );
    #endif
    #if defined(Version4) || defined(Version5)
    magma_free_cpu( T );
    #endif
    
    timer_stop( time_total );
    flops_stop( flop_total );
    timer_printf( "dgeev times n %5d, gehrd %7.3f, unghr %7.3f, hseqr %7.3f, trevc %7.3f, total %7.3f, sum %7.3f\n",
                  (int) n, time_gehrd, time_unghr, time_hseqr, time_trevc, time_total, time_sum );
    timer_printf( "dgeev flops n %5d, gehrd %7lld, unghr %7lld, hseqr %7lld, trevc %7lld, total %7lld, sum %7lld\n",
                  (int) n, flop_gehrd, flop_unghr, flop_hseqr, flop_trevc, flop_total, flop_sum );
    
    work[0] = MAGMA_D_MAKE( (double) optwrk, 0. );
    
    return *info;
} /* magma_dgeev */