extern "C" magma_int_t magma_dsyevd( magma_vec_t jobz, magma_uplo_t uplo, magma_int_t n, double *a, magma_int_t lda, double *w, double *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_queue_t queue, magma_int_t *info) { /* -- MAGMA (version 1.3.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver @date November 2014 Purpose ======= DSYEVD computes all eigenvalues and, optionally, eigenvectors of a real symmetric matrix A. If eigenvectors are desired, it uses a divide and conquer algorithm. The divide and conquer algorithm makes very mild assumptions about floating point arithmetic. It will work on machines with a guard digit in add/subtract, or on those binary machines without guard digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. It could conceivably fail on hexadecimal or decimal machines without guard digits, but we know of none. Arguments ========= JOBZ (input) CHARACTER*1 = 'N': Compute eigenvalues only; = 'V': Compute eigenvalues and eigenvectors. UPLO (input) CHARACTER*1 = 'U': Upper triangle of A is stored; = 'L': Lower triangle of A is stored. N (input) INTEGER The order of the matrix A. N >= 0. A (input/output) DOUBLE_PRECISION array, dimension (LDA, N) On entry, the symmetric matrix A. If UPLO = 'U', the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A. If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A. On exit, if JOBZ = 'V', then if INFO = 0, A contains the orthonormal eigenvectors of the matrix A. If JOBZ = 'N', then on exit the lower triangle (if UPLO='L') or the upper triangle (if UPLO='U') of A, including the diagonal, is destroyed. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). W (output) DOUBLE PRECISION array, dimension (N) If INFO = 0, the eigenvalues in ascending order. WORK (workspace/output) DOUBLE_PRECISION array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. LWORK (input) INTEGER The length of the array WORK. If N <= 1, LWORK >= 1. If JOBZ = 'N' and N > 1, LWORK >= 2*N + N*NB. If JOBZ = 'V' and N > 1, LWORK >= max( 2*N + N*NB, 1 + 6*N + 2*N**2 ). NB can be obtained through magma_get_dsytrd_nb(N). If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal sizes of the WORK and IWORK arrays, returns these values as the first entries of the WORK and IWORK arrays, and no error message related to LWORK or LIWORK is issued by XERBLA. IWORK (workspace/output) INTEGER array, dimension (MAX(1,LIWORK)) On exit, if INFO = 0, IWORK[0] returns the optimal LIWORK. LIWORK (input) INTEGER The dimension of the array IWORK. If N <= 1, LIWORK >= 1. If JOBZ = 'N' and N > 1, LIWORK >= 1. If JOBZ = 'V' and N > 1, LIWORK >= 3 + 5*N. If LIWORK = -1, then a workspace query is assumed; the routine only calculates the optimal sizes of the WORK and IWORK arrays, returns these values as the first entries of the WORK and IWORK arrays, and no error message related to LWORK or LIWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value > 0: if INFO = i and JOBZ = 'N', then the algorithm failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero; if INFO = i and JOBZ = 'V', then the algorithm failed to compute an eigenvalue while working on the submatrix lying in rows and columns INFO/(N+1) through mod(INFO,N+1). Further Details =============== Based on contributions by Jeff Rutter, Computer Science Division, University of California at Berkeley, USA Modified description of INFO. Sven, 16 Feb 05. ===================================================================== */ const char* uplo_ = lapack_uplo_const( uplo ); const char* jobz_ = lapack_vec_const( jobz ); magma_int_t ione = 1; magma_int_t izero = 0; double d_one = 1.; double d__1; double eps; magma_int_t inde; double anrm; double rmin, rmax; double sigma; magma_int_t iinfo, lwmin; magma_int_t lower; magma_int_t wantz; magma_int_t indwk2, llwrk2; magma_int_t iscale; double safmin; double bignum; magma_int_t indtau; magma_int_t indwrk, liwmin; magma_int_t llwork; double smlnum; magma_int_t lquery; magmaDouble_ptr dwork; wantz = (jobz == MagmaVec); lower = (uplo == MagmaLower); lquery = (lwork == -1 || liwork == -1); *info = 0; if (! (wantz || (jobz == MagmaNoVec))) { *info = -1; } else if (! (lower || (uplo == MagmaUpper))) { *info = -2; } else if (n < 0) { *info = -3; } else if (lda < max(1,n)) { *info = -5; } magma_int_t nb = magma_get_dsytrd_nb( n ); if ( n <= 1 ) { lwmin = 1; liwmin = 1; } else if ( wantz ) { lwmin = max( 2*n + n*nb, 1 + 6*n + 2*n*n ); liwmin = 3 + 5*n; } else { lwmin = 2*n + n*nb; liwmin = 1; } // multiply by 1+eps to ensure length gets rounded up, // if it cannot be exactly represented in floating point. double one_eps = 1. + lapackf77_dlamch("Epsilon"); work[0] = lwmin * one_eps; iwork[0] = liwmin; if ((lwork < lwmin) && !lquery) { *info = -8; } else if ((liwork < liwmin) && ! lquery) { *info = -10; } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } else if (lquery) { return *info; } /* Quick return if possible */ if (n == 0) { return *info; } if (n == 1) { w[0] = a[0]; if (wantz) { a[0] = 1.; } return *info; } /* Check if matrix is very small then just call LAPACK on CPU, no need for GPU */ if (n <= 128) { #ifdef ENABLE_DEBUG printf("--------------------------------------------------------------\n"); printf(" warning matrix too small N=%d NB=%d, calling lapack on CPU \n", (int) n, (int) nb); printf("--------------------------------------------------------------\n"); #endif lapackf77_dsyevd(jobz_, uplo_, &n, a, &lda, w, work, &lwork, iwork, &liwork, info); return *info; } /* Get machine constants. */ safmin = lapackf77_dlamch("Safe minimum"); eps = lapackf77_dlamch("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = magma_dsqrt(smlnum); rmax = magma_dsqrt(bignum); /* Scale matrix to allowable range, if necessary. */ anrm = lapackf77_dlansy("M", uplo_, &n, a, &lda, work ); iscale = 0; if (anrm > 0. && anrm < rmin) { iscale = 1; sigma = rmin / anrm; } else if (anrm > rmax) { iscale = 1; sigma = rmax / anrm; } if (iscale == 1) { lapackf77_dlascl(uplo_, &izero, &izero, &d_one, &sigma, &n, &n, a, &lda, info); } /* Call DSYTRD to reduce symmetric matrix to tridiagonal form. */ // dsytrd work: e (n) + tau (n) + llwork (n*nb) ==> 2n + n*nb // dstedx work: e (n) + tau (n) + z (n*n) + llwrk2 (1 + 4*n + n^2) ==> 1 + 6n + 2n^2 inde = 0; indtau = inde + n; indwrk = indtau + n; indwk2 = indwrk + n*n; llwork = lwork - indwrk; llwrk2 = lwork - indwk2; magma_timer_t time; timer_start( time ); magma_dsytrd(uplo, n, a, lda, w, &work[inde], &work[indtau], &work[indwrk], llwork, queue, &iinfo); timer_stop( time ); timer_printf( "time dsytrd = %6.2f\n", time ); /* For eigenvalues only, call DSTERF. For eigenvectors, first call DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the tridiagonal matrix, then call DORMTR to multiply it to the Householder transformations represented as Householder vectors in A. */ if (! wantz) { lapackf77_dsterf(&n, w, &work[inde], info); } else { timer_start( time ); if (MAGMA_SUCCESS != magma_dmalloc( &dwork, 3*n*(n/2 + 1) )) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } // TTT Possible bug for n < 128 magma_dstedx(MagmaRangeAll, n, 0., 0., 0, 0, w, &work[inde], &work[indwrk], n, &work[indwk2], llwrk2, iwork, liwork, dwork, queue, info); magma_free( dwork ); timer_stop( time ); timer_printf( "time dstedx = %6.2f\n", time ); timer_start( time ); magma_dormtr(MagmaLeft, uplo, MagmaNoTrans, n, n, a, lda, &work[indtau], &work[indwrk], n, &work[indwk2], llwrk2, queue, &iinfo); lapackf77_dlacpy("A", &n, &n, &work[indwrk], &n, a, &lda); timer_stop( time ); timer_printf( "time dormtr + copy = %6.2f\n", time ); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ if (iscale == 1) { d__1 = 1. / sigma; blasf77_dscal(&n, &d__1, w, &ione); } work[0] = lwmin * one_eps; // round up iwork[0] = liwmin; return *info; } /* magma_dsyevd */
/** Purpose ------- DSYEVD computes all eigenvalues and, optionally, eigenvectors of a real symmetric matrix A. If eigenvectors are desired, it uses a divide and conquer algorithm. The divide and conquer algorithm makes very mild assumptions about floating point arithmetic. It will work on machines with a guard digit in add/subtract, or on those binary machines without guard digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. It could conceivably fail on hexadecimal or decimal machines without guard digits, but we know of none. Arguments --------- @param[in] nrgpu INTEGER Number of GPUs to use. @param[in] jobz magma_vec_t - = MagmaNoVec: Compute eigenvalues only; - = MagmaVec: Compute eigenvalues and eigenvectors. @param[in] uplo magma_uplo_t - = MagmaUpper: Upper triangle of A is stored; - = MagmaLower: Lower triangle of A is stored. @param[in] n INTEGER The order of the matrix A. N >= 0. @param[in,out] A DOUBLE_PRECISION array, dimension (LDA, N) On entry, the symmetric matrix A. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A. If UPLO = MagmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A. On exit, if JOBZ = MagmaVec, then if INFO = 0, A contains the orthonormal eigenvectors of the matrix A. If JOBZ = MagmaNoVec, then on exit the lower triangle (if UPLO=MagmaLower) or the upper triangle (if UPLO=MagmaUpper) of A, including the diagonal, is destroyed. @param[in] lda INTEGER The leading dimension of the array A. LDA >= max(1,N). @param[out] w DOUBLE PRECISION array, dimension (N) If INFO = 0, the eigenvalues in ascending order. @param[out] work (workspace) DOUBLE_PRECISION array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. @param[in] lwork INTEGER The length of the array WORK. If N <= 1, LWORK >= 1. If JOBZ = MagmaNoVec and N > 1, LWORK >= 2*N + N*NB. If JOBZ = MagmaVec and N > 1, LWORK >= max( 2*N + N*NB, 1 + 6*N + 2*N**2 ). NB can be obtained through magma_get_dsytrd_nb(N). \n If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal sizes of the WORK, RWORK and IWORK arrays, returns these values as the first entries of the WORK, RWORK and IWORK arrays, and no error message related to LWORK or LRWORK or LIWORK is issued by XERBLA. @param[out] iwork (workspace) INTEGER array, dimension (MAX(1,LIWORK)) On exit, if INFO = 0, IWORK[0] returns the optimal LIWORK. @param[in] liwork INTEGER The dimension of the array IWORK. If N <= 1, LIWORK >= 1. If JOBZ = MagmaNoVec and N > 1, LIWORK >= 1. If JOBZ = MagmaVec and N > 1, LIWORK >= 3 + 5*N. \n If LIWORK = -1, then a workspace query is assumed; the routine only calculates the optimal sizes of the WORK, RWORK and IWORK arrays, returns these values as the first entries of the WORK, RWORK and IWORK arrays, and no error message related to LWORK or LRWORK or LIWORK is issued by XERBLA. @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i and JOBZ = MagmaNoVec, then the algorithm failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero; if INFO = i and JOBZ = MagmaVec, then the algorithm failed to compute an eigenvalue while working on the submatrix lying in rows and columns INFO/(N+1) through mod(INFO,N+1). Further Details --------------- Based on contributions by Jeff Rutter, Computer Science Division, University of California at Berkeley, USA Modified description of INFO. Sven, 16 Feb 05. @ingroup magma_dsyev_driver ********************************************************************/ extern "C" magma_int_t magma_dsyevd_m(magma_int_t nrgpu, magma_vec_t jobz, magma_uplo_t uplo, magma_int_t n, double *A, magma_int_t lda, double *w, double *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magma_int_t *info) { const char* uplo_ = lapack_uplo_const( uplo ); const char* jobz_ = lapack_vec_const( jobz ); magma_int_t ione = 1; magma_int_t izero = 0; double d_one = 1.; double d__1; double eps; magma_int_t inde; double anrm; double rmin, rmax; double sigma; magma_int_t iinfo, lwmin; magma_int_t lower; magma_int_t wantz; magma_int_t indwk2, llwrk2; magma_int_t iscale; double safmin; double bignum; magma_int_t indtau; magma_int_t indwrk, liwmin; magma_int_t llwork; double smlnum; magma_int_t lquery; wantz = (jobz == MagmaVec); lower = (uplo == MagmaLower); lquery = (lwork == -1 || liwork == -1); *info = 0; if (! (wantz || (jobz == MagmaNoVec))) { *info = -1; } else if (! (lower || (uplo == MagmaUpper))) { *info = -2; } else if (n < 0) { *info = -3; } else if (lda < max(1,n)) { *info = -5; } magma_int_t nb = magma_get_dsytrd_nb( n ); if ( n <= 1 ) { lwmin = 1; liwmin = 1; } else if ( wantz ) { lwmin = max( 2*n + n*nb, 1 + 6*n + 2*n*n ); liwmin = 3 + 5*n; } else { lwmin = 2*n + n*nb; liwmin = 1; } // multiply by 1+eps (in Double!) to ensure length gets rounded up, // if it cannot be exactly represented in floating point. real_Double_t one_eps = 1. + lapackf77_dlamch("Epsilon"); work[0] = lwmin * one_eps; iwork[0] = liwmin; if ((lwork < lwmin) && !lquery) { *info = -8; } else if ((liwork < liwmin) && ! lquery) { *info = -10; } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } else if (lquery) { return *info; } /* Quick return if possible */ if (n == 0) { return *info; } if (n == 1) { w[0] = A[0]; if (wantz) { A[0] = 1.; } return *info; } /* Check if matrix is very small then just call LAPACK on CPU, no need for GPU */ if (n <= 128) { #ifdef ENABLE_DEBUG printf("--------------------------------------------------------------\n"); printf(" warning matrix too small N=%d NB=%d, calling lapack on CPU \n", (int) n, (int) nb); printf("--------------------------------------------------------------\n"); #endif lapackf77_dsyevd(jobz_, uplo_, &n, A, &lda, w, work, &lwork, iwork, &liwork, info); return *info; } /* Get machine constants. */ safmin = lapackf77_dlamch("Safe minimum"); eps = lapackf77_dlamch("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = magma_dsqrt(smlnum); rmax = magma_dsqrt(bignum); /* Scale matrix to allowable range, if necessary. */ anrm = lapackf77_dlansy("M", uplo_, &n, A, &lda, work); iscale = 0; if (anrm > 0. && anrm < rmin) { iscale = 1; sigma = rmin / anrm; } else if (anrm > rmax) { iscale = 1; sigma = rmax / anrm; } if (iscale == 1) { lapackf77_dlascl(uplo_, &izero, &izero, &d_one, &sigma, &n, &n, A, &lda, info); } /* Call DSYTRD to reduce symmetric matrix to tridiagonal form. */ // dsytrd work: e (n) + tau (n) + llwork (n*nb) ==> 2n + n*nb // dstedx work: e (n) + tau (n) + z (n*n) + llwrk2 (1 + 4*n + n^2) ==> 1 + 6n + 2n^2 inde = 0; indtau = inde + n; indwrk = indtau + n; indwk2 = indwrk + n*n; llwork = lwork - indwrk; llwrk2 = lwork - indwk2; magma_timer_t time=0; timer_start( time ); magma_dsytrd_mgpu(nrgpu, 1, uplo, n, A, lda, w, &work[inde], &work[indtau], &work[indwrk], llwork, &iinfo); timer_stop( time ); timer_printf( "time dsytrd = %6.2f\n", time ); /* For eigenvalues only, call DSTERF. For eigenvectors, first call DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the tridiagonal matrix, then call DORMTR to multiply it to the Householder transformations represented as Householder vectors in A. */ if (! wantz) { lapackf77_dsterf(&n, w, &work[inde], info); } else { timer_start( time ); #ifdef USE_SINGLE_GPU if (MAGMA_SUCCESS != magma_dmalloc( &dwork, 3*n*(n/2 + 1) )) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } magma_dstedx(MagmaRangeAll, n, 0., 0., 0, 0, w, &work[inde], &work[indwrk], n, &work[indwk2], llwrk2, iwork, liwork, dwork, info); magma_free( dwork ); #else magma_dstedx_m(nrgpu, MagmaRangeAll, n, 0., 0., 0, 0, w, &work[inde], &work[indwrk], n, &work[indwk2], llwrk2, iwork, liwork, info); #endif timer_stop( time ); timer_printf( "time dstedc = %6.2f\n", time ); timer_start( time ); magma_dormtr_m(nrgpu, MagmaLeft, uplo, MagmaNoTrans, n, n, A, lda, &work[indtau], &work[indwrk], n, &work[indwk2], llwrk2, &iinfo); lapackf77_dlacpy("A", &n, &n, &work[indwrk], &n, A, &lda); timer_stop( time ); timer_printf( "time dormtr + copy = %6.2f\n", time ); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ if (iscale == 1) { d__1 = 1. / sigma; blasf77_dscal(&n, &d__1, w, &ione); } work[0] = lwmin * one_eps; // round up iwork[0] = liwmin; return *info; } /* magma_dsyevd_m */
extern "C" magma_int_t magma_dgeev( magma_vec_t jobvl, magma_vec_t jobvr, magma_int_t n, double *a, magma_int_t lda, double *WR, double *WI, double *vl, magma_int_t ldvl, double *vr, magma_int_t ldvr, double *work, magma_int_t lwork, magma_queue_t queue, magma_int_t *info) { /* -- clMAGMA (version 1.3.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver @date November 2014 Purpose ======= DGEEV computes for an N-by-N real nonsymmetric matrix A, the eigenvalues and, optionally, the left and/or right eigenvectors. The right eigenvector v(j) of A satisfies A * v(j) = lambda(j) * v(j) where lambda(j) is its eigenvalue. The left eigenvector u(j) of A satisfies u(j)**T * A = lambda(j) * u(j)**T where u(j)**T denotes the transpose of u(j). The computed eigenvectors are normalized to have Euclidean norm equal to 1 and largest component real. Arguments ========= JOBVL (input) CHARACTER*1 = 'N': left eigenvectors of A are not computed; = 'V': left eigenvectors of are computed. JOBVR (input) CHARACTER*1 = 'N': right eigenvectors of A are not computed; = 'V': right eigenvectors of A are computed. N (input) INTEGER The order of the matrix A. N >= 0. A (input/output) DOUBLE PRECISION array, dimension (LDA,N) On entry, the N-by-N matrix A. On exit, A has been overwritten. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). WR (output) DOUBLE PRECISION array, dimension (N) WI (output) DOUBLE PRECISION array, dimension (N) WR and WI contain the real and imaginary parts, respectively, of the computed eigenvalues. Complex conjugate pairs of eigenvalues appear consecutively with the eigenvalue having the positive imaginary part first. VL (output) DOUBLE PRECISION array, dimension (LDVL,N) If JOBVL = 'V', the left eigenvectors u(j) are stored one after another in the columns of VL, in the same order as their eigenvalues. If JOBVL = 'N', VL is not referenced. u(j) = VL(:,j), the j-th column of VL. LDVL (input) INTEGER The leading dimension of the array VL. LDVL >= 1; if JOBVL = 'V', LDVL >= N. VR (output) DOUBLE PRECISION array, dimension (LDVR,N) If JOBVR = 'V', the right eigenvectors v(j) are stored one after another in the columns of VR, in the same order as their eigenvalues. If JOBVR = 'N', VR is not referenced. v(j) = VR(:,j), the j-th column of VR. LDVR (input) INTEGER The leading dimension of the array VR. LDVR >= 1; if JOBVR = 'V', LDVR >= N. WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= (1+nb)*N. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if INFO = i, the QR algorithm failed to compute all the eigenvalues, and no eigenvectors have been computed; elements and i+1:N of W contain eigenvalues which have converged. ===================================================================== */ magma_int_t ione = 1; magma_int_t c__1 = 1; magma_int_t c__0 = 0; magma_int_t c_n1 = -1; magma_int_t a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2, i__3; double d__1, d__2; magma_int_t i__, k, ihi, ilo; double r__, cs, sn, scl; double dum[1], eps; magma_int_t ibal; double anrm; magma_int_t ierr, itau, iwrk, nout; magma_int_t scalea; double cscale; double bignum; magma_int_t minwrk; magma_int_t wantvl; double smlnum; magma_int_t lquery, wantvr, select[1]; magma_int_t nb = 0; magmaDouble_ptr dT; //magma_timestr_t start, end; const char* side_ = NULL; *info = 0; lquery = lwork == -1; wantvl = (jobvl == MagmaVec); wantvr = (jobvr == MagmaVec); if (! wantvl && jobvl != MagmaNoVec) { *info = -1; } else if (! wantvr && jobvr != MagmaNoVec) { *info = -2; } else if (n < 0) { *info = -3; } else if (lda < max(1,n)) { *info = -5; } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) { *info = -9; } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) { *info = -11; } /* Compute workspace */ if (*info == 0) { nb = magma_get_dgehrd_nb(n); minwrk = (2+nb)*n; work[0] = (double) minwrk; if (lwork < minwrk && ! lquery) { *info = -13; } } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } else if (lquery) { return *info; } /* Quick return if possible */ if (n == 0) { return *info; } // if eigenvectors are needed #if defined(VERSION3) if (MAGMA_SUCCESS != magma_dmalloc( &dT, nb*n )) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } #endif // subtract row and col for 1-based indexing a_dim1 = lda; a_offset = 1 + a_dim1; a -= a_offset; vl_dim1 = ldvl; vl_offset = 1 + vl_dim1; vl -= vl_offset; vr_dim1 = ldvr; vr_offset = 1 + vr_dim1; vr -= vr_offset; --work; /* Get machine constants */ eps = lapackf77_dlamch("P"); smlnum = lapackf77_dlamch("S"); bignum = 1. / smlnum; lapackf77_dlabad(&smlnum, &bignum); smlnum = magma_dsqrt(smlnum) / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = lapackf77_dlange("M", &n, &n, &a[a_offset], &lda, dum); scalea = 0; if (anrm > 0. && anrm < smlnum) { scalea = 1; cscale = smlnum; } else if (anrm > bignum) { scalea = 1; cscale = bignum; } if (scalea) { lapackf77_dlascl("G", &c__0, &c__0, &anrm, &cscale, &n, &n, &a[a_offset], &lda, &ierr); } /* Balance the matrix (Workspace: need N) */ ibal = 1; lapackf77_dgebal("B", &n, &a[a_offset], &lda, &ilo, &ihi, &work[ibal], &ierr); /* Reduce to upper Hessenberg form (Workspace: need 3*N, prefer 2*N+N*NB) */ itau = ibal + n; iwrk = itau + n; i__1 = lwork - iwrk + 1; //start = get_current_time(); #if defined(VERSION1) /* * Version 1 - LAPACK */ lapackf77_dgehrd(&n, &ilo, &ihi, &a[a_offset], &lda, &work[itau], &work[iwrk], &i__1, &ierr); #elif defined(VERSION2) /* * Version 2 - LAPACK consistent HRD */ magma_dgehrd2(n, ilo, ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, &ierr); #elif defined(VERSION3) /* * Version 3 - LAPACK consistent MAGMA HRD + matrices T stored, */ magma_dgehrd(n, ilo, ihi, &a[a_offset], lda, &work[itau], &work[iwrk], i__1, dT, 0, queue, &ierr); #endif //end = get_current_time(); //printf(" Time for dgehrd = %5.2f sec\n", GetTimerValue(start,end)/1000.); if (wantvl) { /* Want left eigenvectors Copy Householder vectors to VL */ side_ = "Left"; lapackf77_dlacpy(MagmaLowerStr, &n, &n, &a[a_offset], &lda, &vl[vl_offset], &ldvl); /* * Generate orthogonal matrix in VL * (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ i__1 = lwork - iwrk + 1; //start = get_current_time(); #if defined(VERSION1) || defined(VERSION2) /* * Version 1 & 2 - LAPACK */ lapackf77_dorghr(&n, &ilo, &ihi, &vl[vl_offset], &ldvl, &work[itau], &work[iwrk], &i__1, &ierr); #elif defined(VERSION3) /* * Version 3 - LAPACK consistent MAGMA HRD + matrices T stored */ magma_dorghr(n, ilo, ihi, &vl[vl_offset], ldvl, &work[itau], dT, 0, nb, queue, &ierr); #endif //end = get_current_time(); //printf(" Time for dorghr = %5.2f sec\n", GetTimerValue(start,end)/1000.); /* * Perform QR iteration, accumulating Schur vectors in VL * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ iwrk = itau; i__1 = lwork - iwrk + 1; lapackf77_dhseqr("S", "V", &n, &ilo, &ihi, &a[a_offset], &lda, WR, WI, &vl[vl_offset], &ldvl, &work[iwrk], &i__1, info); if (wantvr) { /* Want left and right eigenvectors Copy Schur vectors to VR */ side_ = "Both"; lapackf77_dlacpy("F", &n, &n, &vl[vl_offset], &ldvl, &vr[vr_offset], &ldvr); } } else if (wantvr) { /* Want right eigenvectors Copy Householder vectors to VR */ side_ = "Right"; lapackf77_dlacpy("L", &n, &n, &a[a_offset], &lda, &vr[vr_offset], &ldvr); /* * Generate orthogonal matrix in VR * (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB) */ i__1 = lwork - iwrk + 1; //start = get_current_time(); #if defined(VERSION1) || defined(VERSION2) /* * Version 1 & 2 - LAPACK */ lapackf77_dorghr(&n, &ilo, &ihi, &vr[vr_offset], &ldvr, &work[itau], &work[iwrk], &i__1, &ierr); #elif defined(VERSION3) /* * Version 3 - LAPACK consistent MAGMA HRD + matrices T stored */ magma_dorghr(n, ilo, ihi, &vr[vr_offset], ldvr, &work[itau], dT, 0, nb, queue, &ierr); #endif //end = get_current_time(); //printf(" Time for dorghr = %5.2f sec\n", GetTimerValue(start,end)/1000.); /* * Perform QR iteration, accumulating Schur vectors in VR * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ iwrk = itau; i__1 = lwork - iwrk + 1; lapackf77_dhseqr("S", "V", &n, &ilo, &ihi, &a[a_offset], &lda, WR, WI, &vr[vr_offset], &ldvr, &work[iwrk], &i__1, info); } else { /* * Compute eigenvalues only * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ iwrk = itau; i__1 = lwork - iwrk + 1; lapackf77_dhseqr("E", "N", &n, &ilo, &ihi, &a[a_offset], &lda, WR, WI, &vr[vr_offset], &ldvr, &work[iwrk], &i__1, info); } /* If INFO > 0 from DHSEQR, then quit */ if (*info > 0) { fprintf(stderr, "DHSEQR returned with info = %d\n", (int) *info); goto L50; } if (wantvl || wantvr) { /* * Compute left and/or right eigenvectors * (Workspace: need 4*N) */ lapackf77_dtrevc(side_, "B", select, &n, &a[a_offset], &lda, &vl[vl_offset], &ldvl, &vr[vr_offset], &ldvr, &n, &nout, &work[iwrk], &ierr); } if (wantvl) { /* * Undo balancing of left eigenvectors * (Workspace: need N) */ lapackf77_dgebak("B", "L", &n, &ilo, &ihi, &work[ibal], &n, &vl[vl_offset], &ldvl, &ierr); /* Normalize left eigenvectors and make largest component real */ for (i__ = 1; i__ <= n; ++i__) { if ( WI[i__-1] == 0.) { scl = magma_cblas_dnrm2(n, &vl[i__ * vl_dim1 + 1], 1); scl = 1. / scl; blasf77_dscal( &n, &scl, &vl[i__ * vl_dim1 + 1], &ione ); } else if (WI[i__-1] > 0.) { d__1 = magma_cblas_dnrm2(n, &vl[ i__ * vl_dim1 + 1], 1); d__2 = magma_cblas_dnrm2(n, &vl[(i__ + 1) * vl_dim1 + 1], 1); scl = lapackf77_dlapy2(&d__1, &d__2); scl = 1. / scl; blasf77_dscal( &n, &scl, &vl[ i__ * vl_dim1 + 1], &ione ); blasf77_dscal( &n, &scl, &vl[(i__ + 1) * vl_dim1 + 1], &ione ); i__2 = n; for (k = 1; k <= i__2; ++k) { /* Computing 2nd power */ d__1 = vl[k + i__ * vl_dim1]; /* Computing 2nd power */ d__2 = vl[k + (i__ + 1) * vl_dim1]; work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2; } /* Comment: Fortran BLAS does not have to add 1 C BLAS must add one to cblas_idamax */ k = blasf77_idamax( &n, &work[iwrk], &ione ); //+1; lapackf77_dlartg(&vl[k + i__ * vl_dim1], &vl[k + (i__ + 1) * vl_dim1], &cs, &sn, &r__); blasf77_drot( &n, &vl[ i__ * vl_dim1 + 1], &ione, &vl[(i__ + 1) * vl_dim1 + 1], &ione, &cs, &sn ); vl[k + (i__ + 1) * vl_dim1] = 0.; } } } if (wantvr) { /* * Undo balancing of right eigenvectors * (Workspace: need N) */ lapackf77_dgebak("B", "R", &n, &ilo, &ihi, &work[ibal], &n, &vr[vr_offset], &ldvr, &ierr); /* Normalize right eigenvectors and make largest component real */ for (i__ = 1; i__ <= n; ++i__) { if (WI[i__-1] == 0.) { scl = 1. / magma_cblas_dnrm2(n, &vr[i__ * vr_dim1 + 1], 1); blasf77_dscal( &n, &scl, &vr[i__ * vr_dim1 + 1], &ione ); } else if (WI[i__-1] > 0.) { d__1 = magma_cblas_dnrm2(n, &vr[ i__ * vr_dim1 + 1], 1); d__2 = magma_cblas_dnrm2(n, &vr[(i__ + 1) * vr_dim1 + 1], 1); scl = lapackf77_dlapy2(&d__1, &d__2); scl = 1. / scl; blasf77_dscal( &n, &scl, &vr[ i__ * vr_dim1 + 1], &ione ); blasf77_dscal( &n, &scl, &vr[(i__ + 1) * vr_dim1 + 1], &ione ); i__2 = n; for (k = 1; k <= i__2; ++k) { /* Computing 2nd power */ d__1 = vr[k + i__ * vr_dim1]; /* Computing 2nd power */ d__2 = vr[k + (i__ + 1) * vr_dim1]; work[iwrk + k - 1] = d__1 * d__1 + d__2 * d__2; } /* Comment: Fortran BLAS does not have to add 1 C BLAS must add one to cblas_idamax */ k = blasf77_idamax( &n, &work[iwrk], &ione ); //+1; lapackf77_dlartg(&vr[k + i__ * vr_dim1], &vr[k + (i__ + 1) * vr_dim1], &cs, &sn, &r__); blasf77_drot( &n, &vr[ i__ * vr_dim1 + 1], &ione, &vr[(i__ + 1) * vr_dim1 + 1], &ione, &cs, &sn ); vr[k + (i__ + 1) * vr_dim1] = 0.; } } } /* Undo scaling if necessary */ L50: if (scalea) { i__1 = n - *info; /* Computing MAX */ i__3 = n - *info; i__2 = max(i__3,1); lapackf77_dlascl("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, WR + (*info), &i__2, &ierr); i__1 = n - *info; /* Computing MAX */ i__3 = n - *info; i__2 = max(i__3,1); lapackf77_dlascl("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, WI + (*info), &i__2, &ierr); if (*info > 0) { i__1 = ilo - 1; lapackf77_dlascl("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, WR, &n, &ierr); i__1 = ilo - 1; lapackf77_dlascl("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, WI, &n, &ierr); } } #if defined(VERSION3) magma_free( dT ); #endif return *info; } /* magma_dgeev */
extern "C" magma_int_t magma_dgeev( char jobvl, char jobvr, magma_int_t n, double *A, magma_int_t lda, double *WR, double *WI, double *vl, magma_int_t ldvl, double *vr, magma_int_t ldvr, double *work, magma_int_t lwork, magma_int_t *info ) { /* -- MAGMA (version 1.4.1) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver December 2013 Purpose ======= DGEEV computes for an N-by-N real nonsymmetric matrix A, the eigenvalues and, optionally, the left and/or right eigenvectors. The right eigenvector v(j) of A satisfies A * v(j) = lambda(j) * v(j) where lambda(j) is its eigenvalue. The left eigenvector u(j) of A satisfies u(j)**T * A = lambda(j) * u(j)**T where u(j)**T denotes the transpose of u(j). The computed eigenvectors are normalized to have Euclidean norm equal to 1 and largest component real. Arguments ========= JOBVL (input) CHARACTER*1 = 'N': left eigenvectors of A are not computed; = 'V': left eigenvectors of are computed. JOBVR (input) CHARACTER*1 = 'N': right eigenvectors of A are not computed; = 'V': right eigenvectors of A are computed. N (input) INTEGER The order of the matrix A. N >= 0. A (input/output) DOUBLE PRECISION array, dimension (LDA,N) On entry, the N-by-N matrix A. On exit, A has been overwritten. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). WR (output) DOUBLE PRECISION array, dimension (N) WI (output) DOUBLE PRECISION array, dimension (N) WR and WI contain the real and imaginary parts, respectively, of the computed eigenvalues. Complex conjugate pairs of eigenvalues appear consecutively with the eigenvalue having the positive imaginary part first. VL (output) DOUBLE PRECISION array, dimension (LDVL,N) If JOBVL = 'V', the left eigenvectors u(j) are stored one after another in the columns of VL, in the same order as their eigenvalues. If JOBVL = 'N', VL is not referenced. u(j) = VL(:,j), the j-th column of VL. LDVL (input) INTEGER The leading dimension of the array VL. LDVL >= 1; if JOBVL = 'V', LDVL >= N. VR (output) DOUBLE PRECISION array, dimension (LDVR,N) If JOBVR = 'V', the right eigenvectors v(j) are stored one after another in the columns of VR, in the same order as their eigenvalues. If JOBVR = 'N', VR is not referenced. v(j) = VR(:,j), the j-th column of VR. LDVR (input) INTEGER The leading dimension of the array VR. LDVR >= 1; if JOBVR = 'V', LDVR >= N. WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= (2+nb)*N. For optimal performance, LWORK >= (2+2*nb)*N. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if INFO = i, the QR algorithm failed to compute all the eigenvalues, and no eigenvectors have been computed; elements and i+1:N of W contain eigenvalues which have converged. ===================================================================== */ #define vl(i,j) (vl + (i) + (j)*ldvl) #define vr(i,j) (vr + (i) + (j)*ldvr) magma_int_t c_one = 1; magma_int_t c_zero = 0; double d__1, d__2; double r, cs, sn, scl; double dum[1], eps; double anrm, cscale, bignum, smlnum; magma_int_t i, k, ilo, ihi; magma_int_t ibal, ierr, itau, iwrk, nout, liwrk, i__1, i__2, nb; magma_int_t scalea, minwrk, lquery, wantvl, wantvr, select[1]; char side[2] = {0, 0}; char jobvl_[2] = {jobvl, 0}; char jobvr_[2] = {jobvr, 0}; *info = 0; lquery = lwork == -1; wantvl = lapackf77_lsame( jobvl_, "V" ); wantvr = lapackf77_lsame( jobvr_, "V" ); if (! wantvl && ! lapackf77_lsame( jobvl_, "N" )) { *info = -1; } else if (! wantvr && ! lapackf77_lsame( jobvr_, "N" )) { *info = -2; } else if (n < 0) { *info = -3; } else if (lda < max(1,n)) { *info = -5; } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) { *info = -9; } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) { *info = -11; } /* Compute workspace */ nb = magma_get_dgehrd_nb( n ); if (*info == 0) { minwrk = (2+nb)*n; work[0] = MAGMA_D_MAKE( (double) minwrk, 0. ); if (lwork < minwrk && ! lquery) { *info = -13; } } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } else if (lquery) { return *info; } /* Quick return if possible */ if (n == 0) { return *info; } #if defined(VERSION3) double *dT; if (MAGMA_SUCCESS != magma_dmalloc( &dT, nb*n )) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } #endif /* Get machine constants */ eps = lapackf77_dlamch( "P" ); smlnum = lapackf77_dlamch( "S" ); bignum = 1. / smlnum; lapackf77_dlabad( &smlnum, &bignum ); smlnum = magma_dsqrt( smlnum ) / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = lapackf77_dlange( "M", &n, &n, A, &lda, dum ); scalea = 0; if (anrm > 0. && anrm < smlnum) { scalea = 1; cscale = smlnum; } else if (anrm > bignum) { scalea = 1; cscale = bignum; } if (scalea) { lapackf77_dlascl( "G", &c_zero, &c_zero, &anrm, &cscale, &n, &n, A, &lda, &ierr ); } /* Balance the matrix * (Workspace: need N) */ ibal = 0; lapackf77_dgebal( "B", &n, A, &lda, &ilo, &ihi, &work[ibal], &ierr ); /* Reduce to upper Hessenberg form * (Workspace: need 3*N, prefer 2*N + N*NB) */ itau = ibal + n; iwrk = itau + n; liwrk = lwork - iwrk; #if defined(VERSION1) // Version 1 - LAPACK lapackf77_dgehrd( &n, &ilo, &ihi, A, &lda, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(VERSION2) // Version 2 - LAPACK consistent HRD magma_dgehrd2( n, ilo, ihi, A, lda, &work[itau], &work[iwrk], liwrk, &ierr ); #elif defined(VERSION3) // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored, magma_dgehrd( n, ilo, ihi, A, lda, &work[itau], &work[iwrk], liwrk, dT, &ierr ); #endif if (wantvl) { /* Want left eigenvectors * Copy Householder vectors to VL */ side[0] = 'L'; lapackf77_dlacpy( MagmaLowerStr, &n, &n, A, &lda, vl, &ldvl ); /* Generate orthogonal matrix in VL * (Workspace: need 3*N-1, prefer 2*N + (N-1)*NB) */ #if defined(VERSION1) || defined(VERSION2) // Version 1 & 2 - LAPACK lapackf77_dorghr( &n, &ilo, &ihi, vl, &ldvl, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(VERSION3) // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored magma_dorghr( n, ilo, ihi, vl, ldvl, &work[itau], dT, nb, &ierr ); #endif /* Perform QR iteration, accumulating Schur vectors in VL * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ iwrk = itau; liwrk = lwork - iwrk; lapackf77_dhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, WR, WI, vl, &ldvl, &work[iwrk], &liwrk, info ); if (wantvr) { /* Want left and right eigenvectors * Copy Schur vectors to VR */ side[0] = 'B'; lapackf77_dlacpy( "F", &n, &n, vl, &ldvl, vr, &ldvr ); } } else if (wantvr) { /* Want right eigenvectors * Copy Householder vectors to VR */ side[0] = 'R'; lapackf77_dlacpy( "L", &n, &n, A, &lda, vr, &ldvr ); /* Generate orthogonal matrix in VR * (Workspace: need 3*N-1, prefer 2*N + (N-1)*NB) */ #if defined(VERSION1) || defined(VERSION2) // Version 1 & 2 - LAPACK lapackf77_dorghr( &n, &ilo, &ihi, vr, &ldvr, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(VERSION3) // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored magma_dorghr( n, ilo, ihi, vr, ldvr, &work[itau], dT, nb, &ierr ); #endif /* Perform QR iteration, accumulating Schur vectors in VR * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ iwrk = itau; liwrk = lwork - iwrk; lapackf77_dhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, WR, WI, vr, &ldvr, &work[iwrk], &liwrk, info ); } else { /* Compute eigenvalues only * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ iwrk = itau; liwrk = lwork - iwrk; lapackf77_dhseqr( "E", "N", &n, &ilo, &ihi, A, &lda, WR, WI, vr, &ldvr, &work[iwrk], &liwrk, info ); } /* If INFO > 0 from DHSEQR, then quit */ if (*info > 0) { goto CLEANUP; } if (wantvl || wantvr) { /* Compute left and/or right eigenvectors * (Workspace: need 4*N) */ liwrk = lwork - iwrk; #if TREVC_VERSION == 1 lapackf77_dtrevc( side, "B", select, &n, A, &lda, vl, &ldvl, vr, &ldvr, &n, &nout, &work[iwrk], &ierr ); #elif TREVC_VERSION == 2 lapackf77_dtrevc3( side, "B", select, &n, A, &lda, vl, &ldvl, vr, &ldvr, &n, &nout, &work[iwrk], &liwrk, &ierr ); #endif } if (wantvl) { /* Undo balancing of left eigenvectors * (Workspace: need N) */ lapackf77_dgebak( "B", "L", &n, &ilo, &ihi, &work[ibal], &n, vl, &ldvl, &ierr ); /* Normalize left eigenvectors and make largest component real */ for (i = 0; i < n; ++i) { if ( WI[i] == 0. ) { scl = 1. / cblas_dnrm2( n, vl(0,i), 1 ); cblas_dscal( n, scl, vl(0,i), 1 ); } else if ( WI[i] > 0. ) { d__1 = cblas_dnrm2( n, vl(0,i), 1 ); d__2 = cblas_dnrm2( n, vl(0,i+1), 1 ); scl = 1. / lapackf77_dlapy2( &d__1, &d__2 ); cblas_dscal( n, scl, vl(0,i), 1 ); cblas_dscal( n, scl, vl(0,i+1), 1 ); for (k = 0; k < n; ++k) { /* Computing 2nd power */ d__1 = *vl(k,i); d__2 = *vl(k,i+1); work[iwrk + k] = d__1*d__1 + d__2*d__2; } k = cblas_idamax( n, &work[iwrk], 1 ); lapackf77_dlartg( vl(k,i), vl(k,i+1), &cs, &sn, &r ); cblas_drot( n, vl(0,i), 1, vl(0,i+1), 1, cs, sn ); *vl(k,i+1) = 0.; } } } if (wantvr) { /* Undo balancing of right eigenvectors * (Workspace: need N) */ lapackf77_dgebak( "B", "R", &n, &ilo, &ihi, &work[ibal], &n, vr, &ldvr, &ierr ); /* Normalize right eigenvectors and make largest component real */ for (i = 0; i < n; ++i) { if ( WI[i] == 0. ) { scl = 1. / cblas_dnrm2( n, vr(0,i), 1 ); cblas_dscal( n, scl, vr(0,i), 1 ); } else if ( WI[i] > 0. ) { d__1 = cblas_dnrm2( n, vr(0,i), 1 ); d__2 = cblas_dnrm2( n, vr(0,i+1), 1 ); scl = 1. / lapackf77_dlapy2( &d__1, &d__2 ); cblas_dscal( n, scl, vr(0,i), 1 ); cblas_dscal( n, scl, vr(0,i+1), 1 ); for (k = 0; k < n; ++k) { /* Computing 2nd power */ d__1 = *vr(k,i); d__2 = *vr(k,i+1); work[iwrk + k] = d__1*d__1 + d__2*d__2; } k = cblas_idamax( n, &work[iwrk], 1 ); lapackf77_dlartg( vr(k,i), vr(k,i+1), &cs, &sn, &r ); cblas_drot( n, vr(0,i), 1, vr(0,i+1), 1, cs, sn ); *vr(k,i+1) = 0.; } } } CLEANUP: /* Undo scaling if necessary */ if (scalea) { i__1 = n - (*info); i__2 = max( n - (*info), 1 ); lapackf77_dlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one, WR + (*info), &i__2, &ierr ); lapackf77_dlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one, WI + (*info), &i__2, &ierr ); if (*info > 0) { i__1 = ilo - 1; lapackf77_dlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one, WR, &n, &ierr ); lapackf77_dlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one, WI, &n, &ierr ); } } #if defined(VERSION3) magma_free( dT ); #endif return *info; } /* magma_dgeev */
/** Purpose ------- DSTEDX computes some eigenvalues and, optionally, eigenvectors of a symmetric tridiagonal matrix using the divide and conquer method. This code makes very mild assumptions about floating point arithmetic. It will work on machines with a guard digit in add/subtract, or on those binary machines without guard digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. It could conceivably fail on hexadecimal or decimal machines without guard digits, but we know of none. See DLAEX3 for details. Arguments --------- @param[in] range magma_range_t - = MagmaRangeAll: all eigenvalues will be found. - = MagmaRangeV: all eigenvalues in the half-open interval (VL,VU] will be found. - = MagmaRangeI: the IL-th through IU-th eigenvalues will be found. @param[in] n INTEGER The dimension of the symmetric tridiagonal matrix. N >= 0. @param[in] vl DOUBLE PRECISION @param[in] vu DOUBLE PRECISION If RANGE=MagmaRangeV, the lower and upper bounds of the interval to be searched for eigenvalues. VL < VU. Not referenced if RANGE = MagmaRangeAll or MagmaRangeI. @param[in] il INTEGER @param[in] iu INTEGER If RANGE=MagmaRangeI, the indices (in ascending order) of the smallest and largest eigenvalues to be returned. 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. Not referenced if RANGE = MagmaRangeAll or MagmaRangeV. @param[in,out] d DOUBLE PRECISION array, dimension (N) On entry, the diagonal elements of the tridiagonal matrix. On exit, if INFO = 0, the eigenvalues in ascending order. @param[in,out] e DOUBLE PRECISION array, dimension (N-1) On entry, the subdiagonal elements of the tridiagonal matrix. On exit, E has been destroyed. @param[in,out] Z DOUBLE PRECISION array, dimension (LDZ,N) On exit, if INFO = 0, Z contains the orthonormal eigenvectors of the symmetric tridiagonal matrix. @param[in] ldz INTEGER The leading dimension of the array Z. LDZ >= max(1,N). @param[out] work (workspace) DOUBLE PRECISION array, dimension (LWORK) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. @param[in] lwork INTEGER The dimension of the array WORK. If N > 1 then LWORK >= ( 1 + 4*N + N**2 ). Note that if N is less than or equal to the minimum divide size, usually 25, then LWORK need only be max(1,2*(N-1)). \n If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. @param[out] iwork (workspace) INTEGER array, dimension (MAX(1,LIWORK)) On exit, if INFO = 0, IWORK[0] returns the optimal LIWORK. @param[in] liwork INTEGER The dimension of the array IWORK. LIWORK >= ( 3 + 5*N ). Note that if N is less than or equal to the minimum divide size, usually 25, then LIWORK need only be 1. \n If LIWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the IWORK array, returns this value as the first entry of the IWORK array, and no error message related to LIWORK is issued by XERBLA. @param dwork (workspace) DOUBLE PRECISION array, dimension (3*N*N/2+3*N) @param[out] info INTEGER - = 0: successful exit. - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: The algorithm failed to compute an eigenvalue while working on the submatrix lying in rows and columns INFO/(N+1) through mod(INFO,N+1). Further Details --------------- Based on contributions by Jeff Rutter, Computer Science Division, University of California at Berkeley, USA Modified by Francoise Tisseur, University of Tennessee. @ingroup magma_dsyev_comp ********************************************************************/ extern "C" magma_int_t magma_dstedx( magma_range_t range, magma_int_t n, double vl, double vu, magma_int_t il, magma_int_t iu, double *d, double *e, double *Z, magma_int_t ldz, double *work, magma_int_t lwork, magma_int_t *iwork, magma_int_t liwork, magmaDouble_ptr dwork, magma_int_t *info) { #define Z(i_,j_) (Z + (i_) + (j_)*ldz) double d_zero = 0.; double d_one = 1.; magma_int_t izero = 0; magma_int_t ione = 1; magma_int_t alleig, indeig, valeig, lquery; magma_int_t i, j, k, m; magma_int_t liwmin, lwmin; magma_int_t start, end, smlsiz; double eps, orgnrm, p, tiny; // Test the input parameters. alleig = (range == MagmaRangeAll); valeig = (range == MagmaRangeV); indeig = (range == MagmaRangeI); lquery = (lwork == -1 || liwork == -1); *info = 0; if (! (alleig || valeig || indeig)) { *info = -1; } else if (n < 0) { *info = -2; } else if (ldz < max(1,n)) { *info = -10; } else { if (valeig) { if (n > 0 && vu <= vl) { *info = -4; } } else if (indeig) { if (il < 1 || il > max(1,n)) { *info = -5; } else if (iu < min(n,il) || iu > n) { *info = -6; } } } if (*info == 0) { // Compute the workspace requirements smlsiz = magma_get_smlsize_divideconquer(); if ( n <= 1 ) { lwmin = 1; liwmin = 1; } else { lwmin = 1 + 4*n + n*n; liwmin = 3 + 5*n; } work[0] = magma_dmake_lwork( lwmin ); iwork[0] = liwmin; if (lwork < lwmin && ! lquery) { *info = -12; } else if (liwork < liwmin && ! lquery) { *info = -14; } } if (*info != 0) { magma_xerbla( __func__, -(*info)); return *info; } else if (lquery) { return *info; } // Quick return if possible if (n == 0) return *info; if (n == 1) { *Z = 1.; return *info; } /* determine the number of threads *///not needed here to be checked Azzam //magma_int_t threads = magma_get_parallel_numthreads(); //magma_int_t mklth = magma_get_lapack_numthreads(); //magma_set_lapack_numthreads(mklth); #ifdef ENABLE_DEBUG //printf(" D&C is using %d threads\n", threads); #endif // If N is smaller than the minimum divide size (SMLSIZ+1), then // solve the problem with another solver. if (n < smlsiz) { lapackf77_dsteqr("I", &n, d, e, Z, &ldz, work, info); } else { lapackf77_dlaset("F", &n, &n, &d_zero, &d_one, Z, &ldz); //Scale. orgnrm = lapackf77_dlanst("M", &n, d, e); if (orgnrm == 0) { work[0] = magma_dmake_lwork( lwmin ); iwork[0] = liwmin; return *info; } eps = lapackf77_dlamch( "Epsilon" ); if (alleig) { start = 0; while ( start < n ) { // Let FINISH be the position of the next subdiagonal entry // such that E( END ) <= TINY or FINISH = N if no such // subdiagonal exists. The matrix identified by the elements // between START and END constitutes an independent // sub-problem. for (end = start+1; end < n; ++end) { tiny = eps * sqrt( MAGMA_D_ABS(d[end-1]*d[end])); if (MAGMA_D_ABS(e[end-1]) <= tiny) break; } // (Sub) Problem determined. Compute its size and solve it. m = end - start; if (m == 1) { start = end; continue; } if (m > smlsiz) { // Scale orgnrm = lapackf77_dlanst("M", &m, &d[start], &e[start]); lapackf77_dlascl("G", &izero, &izero, &orgnrm, &d_one, &m, &ione, &d[start], &m, info); magma_int_t mm = m-1; lapackf77_dlascl("G", &izero, &izero, &orgnrm, &d_one, &mm, &ione, &e[start], &mm, info); magma_dlaex0( m, &d[start], &e[start], Z(start, start), ldz, work, iwork, dwork, MagmaRangeAll, vl, vu, il, iu, info); if ( *info != 0) { return *info; } // Scale Back lapackf77_dlascl("G", &izero, &izero, &d_one, &orgnrm, &m, &ione, &d[start], &m, info); } else { lapackf77_dsteqr( "I", &m, &d[start], &e[start], Z(start, start), &ldz, work, info); if (*info != 0) { *info = (start+1) *(n+1) + end; } } start = end; } // If the problem split any number of times, then the eigenvalues // will not be properly ordered. Here we permute the eigenvalues // (and the associated eigenvectors) into ascending order. if (m < n) { // Use Selection Sort to minimize swaps of eigenvectors for (i = 1; i < n; ++i) { k = i-1; p = d[i-1]; for (j = i; j < n; ++j) { if (d[j] < p) { k = j; p = d[j]; } } if (k != i-1) { d[k] = d[i-1]; d[i-1] = p; blasf77_dswap(&n, Z(0,i-1), &ione, Z(0,k), &ione); } } } } else { // Scale lapackf77_dlascl("G", &izero, &izero, &orgnrm, &d_one, &n, &ione, d, &n, info); magma_int_t nm = n-1; lapackf77_dlascl("G", &izero, &izero, &orgnrm, &d_one, &nm, &ione, e, &nm, info); magma_dlaex0( n, d, e, Z, ldz, work, iwork, dwork, range, vl, vu, il, iu, info); if ( *info != 0) { return *info; } // Scale Back lapackf77_dlascl("G", &izero, &izero, &d_one, &orgnrm, &n, &ione, d, &n, info); } } work[0] = magma_dmake_lwork( lwmin ); iwork[0] = liwmin; return *info; } /* magma_dstedx */
/** Purpose ------- DGEEV computes for an N-by-N real nonsymmetric matrix A, the eigenvalues and, optionally, the left and/or right eigenvectors. The right eigenvector v(j) of A satisfies A * v(j) = lambda(j) * v(j) where lambda(j) is its eigenvalue. The left eigenvector u(j) of A satisfies u(j)**T * A = lambda(j) * u(j)**T where u(j)**T denotes the transpose of u(j). The computed eigenvectors are normalized to have Euclidean norm equal to 1 and largest component real. Arguments --------- @param[in] jobvl magma_vec_t - = MagmaNoVec: left eigenvectors of A are not computed; - = MagmaVec: left eigenvectors of are computed. @param[in] jobvr magma_vec_t - = MagmaNoVec: right eigenvectors of A are not computed; - = MagmaVec: right eigenvectors of A are computed. @param[in] n INTEGER The order of the matrix A. N >= 0. @param[in,out] A DOUBLE PRECISION array, dimension (LDA,N) On entry, the N-by-N matrix A. On exit, A has been overwritten. @param[in] lda INTEGER The leading dimension of the array A. LDA >= max(1,N). @param[out] wr DOUBLE PRECISION array, dimension (N) @param[out] wi DOUBLE PRECISION array, dimension (N) WR and WI contain the real and imaginary parts, respectively, of the computed eigenvalues. Complex conjugate pairs of eigenvalues appear consecutively with the eigenvalue having the positive imaginary part first. @param[out] VL DOUBLE PRECISION array, dimension (LDVL,N) If JOBVL = MagmaVec, the left eigenvectors u(j) are stored one after another in the columns of VL, in the same order as their eigenvalues. If JOBVL = MagmaNoVec, VL is not referenced. u(j) = VL(:,j), the j-th column of VL. @param[in] ldvl INTEGER The leading dimension of the array VL. LDVL >= 1; if JOBVL = MagmaVec, LDVL >= N. @param[out] VR DOUBLE PRECISION array, dimension (LDVR,N) If JOBVR = MagmaVec, the right eigenvectors v(j) are stored one after another in the columns of VR, in the same order as their eigenvalues. If JOBVR = MagmaNoVec, VR is not referenced. v(j) = VR(:,j), the j-th column of VR. @param[in] ldvr INTEGER The leading dimension of the array VR. LDVR >= 1; if JOBVR = MagmaVec, LDVR >= N. @param[out] work (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. @param[in] lwork INTEGER The dimension of the array WORK. LWORK >= (2+nb)*N. For optimal performance, LWORK >= (2+2*nb)*N. \n If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value. - > 0: if INFO = i, the QR algorithm failed to compute all the eigenvalues, and no eigenvectors have been computed; elements and i+1:N of W contain eigenvalues which have converged. @ingroup magma_dgeev_driver ********************************************************************/ extern "C" magma_int_t magma_dgeev_m( magma_vec_t jobvl, magma_vec_t jobvr, magma_int_t n, double *A, magma_int_t lda, double *wr, double *wi, double *VL, magma_int_t ldvl, double *VR, magma_int_t ldvr, double *work, magma_int_t lwork, magma_int_t *info ) { #define VL(i,j) (VL + (i) + (j)*ldvl) #define VR(i,j) (VR + (i) + (j)*ldvr) const magma_int_t ione = 1; const magma_int_t izero = 0; double d__1, d__2; double r, cs, sn, scl; double dum[1], eps; double anrm, cscale, bignum, smlnum; magma_int_t i, k, ilo, ihi; magma_int_t ibal, ierr, itau, iwrk, nout, liwrk, nb; magma_int_t scalea, minwrk, optwrk, lquery, wantvl, wantvr, select[1]; magma_side_t side = MagmaRight; magma_timer_t time_total=0, time_gehrd=0, time_unghr=0, time_hseqr=0, time_trevc=0, time_sum=0; magma_flops_t flop_total=0, flop_gehrd=0, flop_unghr=0, flop_hseqr=0, flop_trevc=0, flop_sum=0; timer_start( time_total ); flops_start( flop_total ); *info = 0; lquery = (lwork == -1); wantvl = (jobvl == MagmaVec); wantvr = (jobvr == MagmaVec); if (! wantvl && jobvl != MagmaNoVec) { *info = -1; } else if (! wantvr && jobvr != MagmaNoVec) { *info = -2; } else if (n < 0) { *info = -3; } else if (lda < max(1,n)) { *info = -5; } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) { *info = -9; } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) { *info = -11; } /* Compute workspace */ nb = magma_get_dgehrd_nb( n ); if (*info == 0) { minwrk = (2 + nb)*n; optwrk = (2 + 2*nb)*n; work[0] = MAGMA_D_MAKE( (double) optwrk, 0. ); if (lwork < minwrk && ! lquery) { *info = -13; } } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } else if (lquery) { return *info; } /* Quick return if possible */ if (n == 0) { return *info; } #if defined(Version3) || defined(Version4) || defined(Version5) double *dT; if (MAGMA_SUCCESS != magma_dmalloc( &dT, nb*n )) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } #endif #if defined(Version4) || defined(Version5) double *T; if (MAGMA_SUCCESS != magma_dmalloc_cpu( &T, nb*n )) { magma_free( dT ); *info = MAGMA_ERR_HOST_ALLOC; return *info; } #endif /* Get machine constants */ eps = lapackf77_dlamch( "P" ); smlnum = lapackf77_dlamch( "S" ); bignum = 1. / smlnum; lapackf77_dlabad( &smlnum, &bignum ); smlnum = magma_dsqrt( smlnum ) / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = lapackf77_dlange( "M", &n, &n, A, &lda, dum ); scalea = 0; if (anrm > 0. && anrm < smlnum) { scalea = 1; cscale = smlnum; } else if (anrm > bignum) { scalea = 1; cscale = bignum; } if (scalea) { lapackf77_dlascl( "G", &izero, &izero, &anrm, &cscale, &n, &n, A, &lda, &ierr ); } /* Balance the matrix * (Workspace: need N) * - this space is reserved until after gebak */ ibal = 0; lapackf77_dgebal( "B", &n, A, &lda, &ilo, &ihi, &work[ibal], &ierr ); /* Reduce to upper Hessenberg form * (Workspace: need 3*N, prefer 2*N + N*NB) * - including N reserved for gebal/gebak, unused by dgehrd */ itau = ibal + n; iwrk = itau + n; liwrk = lwork - iwrk; timer_start( time_gehrd ); flops_start( flop_gehrd ); #if defined(Version1) // Version 1 - LAPACK lapackf77_dgehrd( &n, &ilo, &ihi, A, &lda, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(Version2) // Version 2 - LAPACK consistent HRD magma_dgehrd2( n, ilo, ihi, A, lda, &work[itau], &work[iwrk], liwrk, &ierr ); #elif defined(Version3) // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored, magma_dgehrd( n, ilo, ihi, A, lda, &work[itau], &work[iwrk], liwrk, dT, &ierr ); #elif defined(Version4) || defined(Version5) // Version 4 - Multi-GPU, T on host magma_dgehrd_m( n, ilo, ihi, A, lda, &work[itau], &work[iwrk], liwrk, T, &ierr ); magma_dsetmatrix( nb, n, T, nb, dT, nb ); #endif time_sum += timer_stop( time_gehrd ); flop_sum += flops_stop( flop_gehrd ); if (wantvl) { /* Want left eigenvectors * Copy Householder vectors to VL */ side = MagmaLeft; lapackf77_dlacpy( MagmaLowerStr, &n, &n, A, &lda, VL, &ldvl ); /* Generate orthogonal matrix in VL * (Workspace: need 3*N-1, prefer 2*N + (N-1)*NB) * - including N reserved for gebal/gebak, unused by dorghr */ timer_start( time_unghr ); flops_start( flop_unghr ); #if defined(Version1) || defined(Version2) // Version 1 & 2 - LAPACK lapackf77_dorghr( &n, &ilo, &ihi, VL, &ldvl, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(Version3) || defined(Version4) // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored magma_dorghr( n, ilo, ihi, VL, ldvl, &work[itau], dT, nb, &ierr ); #elif defined(Version5) // Version 5 - Multi-GPU, T on host magma_dorghr_m( n, ilo, ihi, VL, ldvl, &work[itau], T, nb, &ierr ); #endif time_sum += timer_stop( time_unghr ); flop_sum += flops_stop( flop_unghr ); timer_start( time_hseqr ); flops_start( flop_hseqr ); /* Perform QR iteration, accumulating Schur vectors in VL * (Workspace: need N+1, prefer N+HSWORK (see comments) ) * - including N reserved for gebal/gebak, unused by dhseqr */ iwrk = itau; liwrk = lwork - iwrk; lapackf77_dhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, wr, wi, VL, &ldvl, &work[iwrk], &liwrk, info ); time_sum += timer_stop( time_hseqr ); flop_sum += flops_stop( flop_hseqr ); if (wantvr) { /* Want left and right eigenvectors * Copy Schur vectors to VR */ side = MagmaBothSides; lapackf77_dlacpy( "F", &n, &n, VL, &ldvl, VR, &ldvr ); } } else if (wantvr) { /* Want right eigenvectors * Copy Householder vectors to VR */ side = MagmaRight; lapackf77_dlacpy( "L", &n, &n, A, &lda, VR, &ldvr ); /* Generate orthogonal matrix in VR * (Workspace: need 3*N-1, prefer 2*N + (N-1)*NB) * - including N reserved for gebal/gebak, unused by dorghr */ timer_start( time_unghr ); flops_start( flop_unghr ); #if defined(Version1) || defined(Version2) // Version 1 & 2 - LAPACK lapackf77_dorghr( &n, &ilo, &ihi, VR, &ldvr, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(Version3) || defined(Version4) // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored magma_dorghr( n, ilo, ihi, VR, ldvr, &work[itau], dT, nb, &ierr ); #elif defined(Version5) // Version 5 - Multi-GPU, T on host magma_dorghr_m( n, ilo, ihi, VR, ldvr, &work[itau], T, nb, &ierr ); #endif time_sum += timer_stop( time_unghr ); flop_sum += flops_stop( flop_unghr ); /* Perform QR iteration, accumulating Schur vectors in VR * (Workspace: need N+1, prefer N+HSWORK (see comments) ) * - including N reserved for gebal/gebak, unused by dhseqr */ timer_start( time_hseqr ); flops_start( flop_hseqr ); iwrk = itau; liwrk = lwork - iwrk; lapackf77_dhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, wr, wi, VR, &ldvr, &work[iwrk], &liwrk, info ); time_sum += timer_stop( time_hseqr ); flop_sum += flops_stop( flop_hseqr ); } else { /* Compute eigenvalues only * (Workspace: need N+1, prefer N+HSWORK (see comments) ) * - including N reserved for gebal/gebak, unused by dhseqr */ timer_start( time_hseqr ); flops_start( flop_hseqr ); iwrk = itau; liwrk = lwork - iwrk; lapackf77_dhseqr( "E", "N", &n, &ilo, &ihi, A, &lda, wr, wi, VR, &ldvr, &work[iwrk], &liwrk, info ); time_sum += timer_stop( time_hseqr ); flop_sum += flops_stop( flop_hseqr ); } /* If INFO > 0 from DHSEQR, then quit */ if (*info > 0) { goto CLEANUP; } timer_start( time_trevc ); flops_start( flop_trevc ); if (wantvl || wantvr) { /* Compute left and/or right eigenvectors * (Workspace: need 4*N, prefer (2 + 2*nb)*N) * - including N reserved for gebal/gebak, unused by dtrevc */ liwrk = lwork - iwrk; #if TREVC_VERSION == 1 lapackf77_dtrevc( lapack_side_const(side), "B", select, &n, A, &lda, VL, &ldvl, VR, &ldvr, &n, &nout, &work[iwrk], &ierr ); #elif TREVC_VERSION == 2 lapackf77_dtrevc3( lapack_side_const(side), "B", select, &n, A, &lda, VL, &ldvl, VR, &ldvr, &n, &nout, &work[iwrk], &liwrk, &ierr ); #elif TREVC_VERSION == 3 magma_dtrevc3( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl, VR, ldvr, n, &nout, &work[iwrk], liwrk, &ierr ); #elif TREVC_VERSION == 4 magma_dtrevc3_mt( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl, VR, ldvr, n, &nout, &work[iwrk], liwrk, &ierr ); #elif TREVC_VERSION == 5 magma_dtrevc3_mt_gpu( side, MagmaBacktransVec, select, n, A, lda, VL, ldvl, VR, ldvr, n, &nout, &work[iwrk], liwrk, &ierr ); #else #error Unknown TREVC_VERSION #endif } time_sum += timer_stop( time_trevc ); flop_sum += flops_stop( flop_trevc ); if (wantvl) { /* Undo balancing of left eigenvectors * (Workspace: need N) */ lapackf77_dgebak( "B", "L", &n, &ilo, &ihi, &work[ibal], &n, VL, &ldvl, &ierr ); /* Normalize left eigenvectors and make largest component real */ for (i = 0; i < n; ++i) { if ( wi[i] == 0. ) { scl = 1. / magma_cblas_dnrm2( n, VL(0,i), 1 ); blasf77_dscal( &n, &scl, VL(0,i), &ione ); } else if ( wi[i] > 0. ) { d__1 = magma_cblas_dnrm2( n, VL(0,i), 1 ); d__2 = magma_cblas_dnrm2( n, VL(0,i+1), 1 ); scl = 1. / lapackf77_dlapy2( &d__1, &d__2 ); blasf77_dscal( &n, &scl, VL(0,i), &ione ); blasf77_dscal( &n, &scl, VL(0,i+1), &ione ); for (k = 0; k < n; ++k) { /* Computing 2nd power */ d__1 = *VL(k,i); d__2 = *VL(k,i+1); work[iwrk + k] = d__1*d__1 + d__2*d__2; } k = blasf77_idamax( &n, &work[iwrk], &ione ) - 1; // subtract 1; k is 0-based lapackf77_dlartg( VL(k,i), VL(k,i+1), &cs, &sn, &r ); blasf77_drot( &n, VL(0,i), &ione, VL(0,i+1), &ione, &cs, &sn ); *VL(k,i+1) = 0.; } } } if (wantvr) { /* Undo balancing of right eigenvectors * (Workspace: need N) */ lapackf77_dgebak( "B", "R", &n, &ilo, &ihi, &work[ibal], &n, VR, &ldvr, &ierr ); /* Normalize right eigenvectors and make largest component real */ for (i = 0; i < n; ++i) { if ( wi[i] == 0. ) { scl = 1. / magma_cblas_dnrm2( n, VR(0,i), 1 ); blasf77_dscal( &n, &scl, VR(0,i), &ione ); } else if ( wi[i] > 0. ) { d__1 = magma_cblas_dnrm2( n, VR(0,i), 1 ); d__2 = magma_cblas_dnrm2( n, VR(0,i+1), 1 ); scl = 1. / lapackf77_dlapy2( &d__1, &d__2 ); blasf77_dscal( &n, &scl, VR(0,i), &ione ); blasf77_dscal( &n, &scl, VR(0,i+1), &ione ); for (k = 0; k < n; ++k) { /* Computing 2nd power */ d__1 = *VR(k,i); d__2 = *VR(k,i+1); work[iwrk + k] = d__1*d__1 + d__2*d__2; } k = blasf77_idamax( &n, &work[iwrk], &ione ) - 1; // subtract 1; k is 0-based lapackf77_dlartg( VR(k,i), VR(k,i+1), &cs, &sn, &r ); blasf77_drot( &n, VR(0,i), &ione, VR(0,i+1), &ione, &cs, &sn ); *VR(k,i+1) = 0.; } } } CLEANUP: /* Undo scaling if necessary */ if (scalea) { // converged eigenvalues, stored in wr[i+1:n] and wi[i+1:n] for i = INFO magma_int_t nval = n - (*info); magma_int_t ld = max( nval, 1 ); lapackf77_dlascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, wr + (*info), &ld, &ierr ); lapackf77_dlascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, wi + (*info), &ld, &ierr ); if (*info > 0) { // first ilo columns were already upper triangular, // so the corresponding eigenvalues are also valid. nval = ilo - 1; lapackf77_dlascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, wr, &n, &ierr ); lapackf77_dlascl( "G", &izero, &izero, &cscale, &anrm, &nval, &ione, wi, &n, &ierr ); } } #if defined(Version3) || defined(Version4) || defined(Version5) magma_free( dT ); #endif #if defined(Version4) || defined(Version5) magma_free_cpu( T ); #endif timer_stop( time_total ); flops_stop( flop_total ); timer_printf( "dgeev times n %5d, gehrd %7.3f, unghr %7.3f, hseqr %7.3f, trevc %7.3f, total %7.3f, sum %7.3f\n", (int) n, time_gehrd, time_unghr, time_hseqr, time_trevc, time_total, time_sum ); timer_printf( "dgeev flops n %5d, gehrd %7lld, unghr %7lld, hseqr %7lld, trevc %7lld, total %7lld, sum %7lld\n", (int) n, flop_gehrd, flop_unghr, flop_hseqr, flop_trevc, flop_total, flop_sum ); work[0] = MAGMA_D_MAKE( (double) optwrk, 0. ); return *info; } /* magma_dgeev */
/***************************************************************************//** Purpose ------- DSYEVD computes all eigenvalues and, optionally, eigenvectors of a real symmetric matrix A. If eigenvectors are desired, it uses a divide and conquer algorithm. The divide and conquer algorithm makes very mild assumptions about floating point arithmetic. It will work on machines with a guard digit in add/subtract, or on those binary machines without guard digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2. It could conceivably fail on hexadecimal or decimal machines without guard digits, but we know of none. Arguments --------- @param[in] ngpu INTEGER Number of GPUs to use. ngpu > 0. @param[in] jobz magma_vec_t - = MagmaNoVec: Compute eigenvalues only; - = MagmaVec: Compute eigenvalues and eigenvectors. @param[in] range magma_range_t - = MagmaRangeAll: all eigenvalues will be found. - = MagmaRangeV: all eigenvalues in the half-open interval (VL,VU] will be found. - = MagmaRangeI: the IL-th through IU-th eigenvalues will be found. @param[in] uplo magma_uplo_t - = MagmaUpper: Upper triangle of A is stored; - = MagmaLower: Lower triangle of A is stored. @param[in] n INTEGER The order of the matrix A. N >= 0. @param[in,out] A DOUBLE PRECISION array, dimension (LDA, N) On entry, the symmetric matrix A. If UPLO = MagmaUpper, the leading N-by-N upper triangular part of A contains the upper triangular part of the matrix A. If UPLO = MagmaLower, the leading N-by-N lower triangular part of A contains the lower triangular part of the matrix A. On exit, if JOBZ = MagmaVec, then if INFO = 0, A contains the orthonormal eigenvectors of the matrix A. If JOBZ = MagmaNoVec, then on exit the lower triangle (if UPLO=MagmaLower) or the upper triangle (if UPLO=MagmaUpper) of A, including the diagonal, is destroyed. @param[in] lda INTEGER The leading dimension of the array A. LDA >= max(1,N). @param[in] vl DOUBLE PRECISION @param[in] vu DOUBLE PRECISION If RANGE=MagmaRangeV, the lower and upper bounds of the interval to be searched for eigenvalues. VL < VU. Not referenced if RANGE = MagmaRangeAll or MagmaRangeI. @param[in] il INTEGER @param[in] iu INTEGER If RANGE=MagmaRangeI, the indices (in ascending order) of the smallest and largest eigenvalues to be returned. 1 <= IL <= IU <= N, if N > 0; IL = 1 and IU = 0 if N = 0. Not referenced if RANGE = MagmaRangeAll or MagmaRangeV. @param[out] m INTEGER The total number of eigenvalues found. 0 <= M <= N. If RANGE = MagmaRangeAll, M = N, and if RANGE = MagmaRangeI, M = IU-IL+1. @param[out] w DOUBLE PRECISION array, dimension (N) If INFO = 0, the eigenvalues in ascending order. @param[out] work (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK[0] returns the optimal LWORK. @param[in] lwork INTEGER The length of the array WORK. - If N <= 1, LWORK >= 1. - If JOBZ = MagmaNoVec and N > 1, LWORK >= 2*N + N*NB. - If JOBZ = MagmaVec and N > 1, LWORK >= max( 2*N + N*NB, 1 + 6*N + 2*N**2 ). NB can be obtained through magma_get_dsytrd_nb(N). \n If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal sizes of the WORK, RWORK and IWORK arrays, returns these values as the first entries of the WORK, RWORK and IWORK arrays, and no error message related to LWORK or LRWORK or LIWORK is issued by XERBLA. @param[out] iwork (workspace) INTEGER array, dimension (MAX(1,LIWORK)) On exit, if INFO = 0, IWORK[0] returns the optimal LIWORK. @param[in] liwork INTEGER The dimension of the array IWORK. - If N <= 1, LIWORK >= 1. - If JOBZ = MagmaNoVec and N > 1, LIWORK >= 1. - If JOBZ = MagmaVec and N > 1, LIWORK >= 3 + 5*N. \n If LIWORK = -1, then a workspace query is assumed; the routine only calculates the optimal sizes of the WORK, RWORK and IWORK arrays, returns these values as the first entries of the WORK, RWORK and IWORK arrays, and no error message related to LWORK or LRWORK or LIWORK is issued by XERBLA. @param[out] info INTEGER - = 0: successful exit - < 0: if INFO = -i, the i-th argument had an illegal value - > 0: if INFO = i and JOBZ = MagmaNoVec, then the algorithm failed to converge; i off-diagonal elements of an intermediate tridiagonal form did not converge to zero; if INFO = i and JOBZ = MagmaVec, then the algorithm failed to compute an eigenvalue while working on the submatrix lying in rows and columns INFO/(N+1) through mod(INFO,N+1). Further Details --------------- Based on contributions by Jeff Rutter, Computer Science Division, University of California at Berkeley, USA Modified description of INFO. Sven, 16 Feb 05. @ingroup magma_heevdx *******************************************************************************/ extern "C" magma_int_t magma_dsyevdx_m( magma_int_t ngpu, magma_vec_t jobz, magma_range_t range, magma_uplo_t uplo, magma_int_t n, double *A, magma_int_t lda, double vl, double vu, magma_int_t il, magma_int_t iu, magma_int_t *m, double *w, double *work, magma_int_t lwork, #ifdef COMPLEX double *rwork, magma_int_t lrwork, #endif magma_int_t *iwork, magma_int_t liwork, magma_int_t *info) { const char* uplo_ = lapack_uplo_const( uplo ); const char* jobz_ = lapack_vec_const( jobz ); magma_int_t ione = 1; magma_int_t izero = 0; double d_one = 1.; double d__1; double eps; magma_int_t inde; double anrm; double rmin, rmax; double sigma; magma_int_t iinfo, lwmin; magma_int_t lower; magma_int_t wantz; magma_int_t indwk2, llwrk2; magma_int_t iscale; double safmin; double bignum; magma_int_t indtau; magma_int_t indwrk, liwmin; magma_int_t llwork; double smlnum; magma_int_t lquery; magma_int_t alleig, valeig, indeig; wantz = (jobz == MagmaVec); lower = (uplo == MagmaLower); alleig = (range == MagmaRangeAll); valeig = (range == MagmaRangeV); indeig = (range == MagmaRangeI); lquery = (lwork == -1 || liwork == -1); *info = 0; if (! (wantz || (jobz == MagmaNoVec))) { *info = -1; } else if (! (alleig || valeig || indeig)) { *info = -2; } else if (! (lower || (uplo == MagmaUpper))) { *info = -3; } else if (n < 0) { *info = -4; } else if (lda < max(1,n)) { *info = -6; } else { if (valeig) { if (n > 0 && vu <= vl) { *info = -8; } } else if (indeig) { if (il < 1 || il > max(1,n)) { *info = -9; } else if (iu < min(n,il) || iu > n) { *info = -10; } } } magma_int_t nb = magma_get_dsytrd_nb( n ); if ( n <= 1 ) { lwmin = 1; liwmin = 1; } else if ( wantz ) { lwmin = max( 2*n + n*nb, 1 + 6*n + 2*n*n ); liwmin = 3 + 5*n; } else { lwmin = 2*n + n*nb; liwmin = 1; } work[0] = magma_dmake_lwork( lwmin ); iwork[0] = liwmin; if ((lwork < lwmin) && !lquery) { *info = -14; } else if ((liwork < liwmin) && ! lquery) { *info = -16; } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } else if (lquery) { return *info; } /* Quick return if possible */ if (n == 0) { return *info; } if (n == 1) { w[0] = A[0]; if (wantz) { A[0] = 1.; } return *info; } /* Check if matrix is very small then just call LAPACK on CPU, no need for GPU */ if (n <= 128) { #ifdef ENABLE_DEBUG printf("--------------------------------------------------------------\n"); printf(" warning matrix too small N=%lld NB=%lld, calling lapack on CPU\n", (long long) n, (long long) nb ); printf("--------------------------------------------------------------\n"); #endif lapackf77_dsyevd(jobz_, uplo_, &n, A, &lda, w, work, &lwork, iwork, &liwork, info); return *info; } /* Get machine constants. */ safmin = lapackf77_dlamch("Safe minimum"); eps = lapackf77_dlamch("Precision"); smlnum = safmin / eps; bignum = 1. / smlnum; rmin = magma_dsqrt(smlnum); rmax = magma_dsqrt(bignum); /* Scale matrix to allowable range, if necessary. */ anrm = lapackf77_dlansy("M", uplo_, &n, A, &lda, work); iscale = 0; if (anrm > 0. && anrm < rmin) { iscale = 1; sigma = rmin / anrm; } else if (anrm > rmax) { iscale = 1; sigma = rmax / anrm; } if (iscale == 1) { lapackf77_dlascl(uplo_, &izero, &izero, &d_one, &sigma, &n, &n, A, &lda, info); } /* Call DSYTRD to reduce symmetric matrix to tridiagonal form. */ // dsytrd work: e (n) + tau (n) + llwork (n*nb) ==> 2n + n*nb // dstedx work: e (n) + tau (n) + z (n*n) + llwrk2 (1 + 4*n + n^2) ==> 1 + 6n + 2n^2 inde = 0; indtau = inde + n; indwrk = indtau + n; indwk2 = indwrk + n*n; llwork = lwork - indwrk; llwrk2 = lwork - indwk2; magma_timer_t time=0; timer_start( time ); magma_dsytrd_mgpu(ngpu, 1, uplo, n, A, lda, w, &work[inde], &work[indtau], &work[indwrk], llwork, &iinfo); timer_stop( time ); timer_printf( "time dsytrd = %6.2f\n", time ); /* For eigenvalues only, call DSTERF. For eigenvectors, first call DSTEDC to generate the eigenvector matrix, WORK(INDWRK), of the tridiagonal matrix, then call DORMTR to multiply it to the Householder transformations represented as Householder vectors in A. */ if (! wantz) { lapackf77_dsterf(&n, w, &work[inde], info); magma_dmove_eig(range, n, w, &il, &iu, vl, vu, m); } else { timer_start( time ); magma_dstedx_m(ngpu, range, n, vl, vu, il, iu, w, &work[inde], &work[indwrk], n, &work[indwk2], llwrk2, iwork, liwork, info); timer_stop( time ); timer_printf( "time dstedc = %6.2f\n", time ); timer_start( time ); magma_dmove_eig(range, n, w, &il, &iu, vl, vu, m); magma_dormtr_m(ngpu, MagmaLeft, uplo, MagmaNoTrans, n, *m, A, lda, &work[indtau], &work[indwrk + n * (il-1)], n, &work[indwk2], llwrk2, &iinfo); lapackf77_dlacpy("A", &n, m, &work[indwrk + n * (il-1)], &n, A, &lda); timer_stop( time ); timer_printf( "time dormtr + copy = %6.2f\n", time ); } /* If matrix was scaled, then rescale eigenvalues appropriately. */ if (iscale == 1) { d__1 = 1. / sigma; blasf77_dscal(&n, &d__1, w, &ione); } work[0] = magma_dmake_lwork( lwmin ); iwork[0] = liwmin; return *info; } /* magma_dsyevd_m */