void Scope::process(const double* inputs) { double max = 1.; m_decoder->process(inputs, m_matrix); max = fabs(m_matrix[cblas_idamax(m_number_of_points, m_matrix, 1)]); if(max > 1.) { cblas_dscal(m_number_of_points, (1. / max), m_matrix, 1.); } }
void Scope::process(const float* inputs) { double max = 1.; for(unsigned int i = 0; i < m_number_of_harmonics; i++) { m_harmonics[i] = inputs[i]; } m_decoder->process(m_harmonics, m_matrix); max = fabs(m_matrix[cblas_idamax(m_number_of_points, m_matrix, 1)]); if(max > 1.) { cblas_dscal(m_number_of_points, (1. / max), m_matrix, 1.); } }
VALUE rb_blas_ixamax(int argc, VALUE *argv, VALUE self) { Matrix *dx; int incx; int incy; int n; //char error_msg[64]; VALUE n_value, incx_value; rb_scan_args(argc, argv, "02", &incx_value, &n_value); Data_Get_Struct(self, Matrix, dx); if(incx_value == Qnil) incx = 1; else incx = NUM2INT(incx_value); if(n_value == Qnil) n = dx->nrows; else n = NUM2INT(n_value); if(dx == NULL || dx->ncols != 1) { //sprintf(error_msg, ); rb_raise(rb_eRuntimeError, "Self is not a Vector"); } switch(dx->data_type) { case Single_t: //s return INT2FIX(cblas_isamax(n , (float *)dx->data, incx)); case Double_t: //d return INT2FIX(cblas_idamax(n , (double *)dx->data, incx)); case Complex_t: //c return INT2FIX(cblas_icamax(n , dx->data, incx)); case Double_Complex_t: //z return INT2FIX(cblas_izamax(n , dx->data, incx)); default: //sprintf(error_msg, "Invalid data_type (%d) in Matrix", dx->data_type); rb_raise(rb_eRuntimeError, "Invalid data_type (%d) in Matrix", dx->data_type); return Qnil; //Never reaches here. } }
void getAbsMax(const gsl_vector* x, double* maxValue, int* maxPosition) { *maxPosition = cblas_idamax(x->size, x->data, x->stride); *maxValue = gsl_vector_get(x, *maxPosition); }
void test_amax (void) { { int N = 1; float X[] = { -0.388f }; int incX = -1; int expected = 0; int k; k = cblas_isamax(N, X, incX); gsl_test_int(k, expected, "samax(case 52)"); }; { int N = 1; double X[] = { 0.247 }; int incX = -1; int expected = 0; int k; k = cblas_idamax(N, X, incX); gsl_test_int(k, expected, "damax(case 53)"); }; { int N = 1; float X[] = { 0.704f, 0.665f }; int incX = -1; int expected = 0; int k; k = cblas_icamax(N, X, incX); gsl_test_int(k, expected, "camax(case 54)"); }; { int N = 1; double X[] = { -0.599, -0.758 }; int incX = -1; int expected = 0; int k; k = cblas_izamax(N, X, incX); gsl_test_int(k, expected, "zamax(case 55)"); }; { int N = 2; float X[] = { 0.909f, 0.037f }; int incX = 1; int expected = 0; int k; k = cblas_isamax(N, X, incX); gsl_test_int(k, expected, "samax(case 56)"); }; { int N = 2; double X[] = { 0.271, -0.426 }; int incX = 1; int expected = 1; int k; k = cblas_idamax(N, X, incX); gsl_test_int(k, expected, "damax(case 57)"); }; { int N = 2; float X[] = { -0.648f, 0.317f, 0.62f, 0.392f }; int incX = 1; int expected = 1; int k; k = cblas_icamax(N, X, incX); gsl_test_int(k, expected, "camax(case 58)"); }; { int N = 2; double X[] = { -0.789, 0.352, 0.562, 0.697 }; int incX = 1; int expected = 1; int k; k = cblas_izamax(N, X, incX); gsl_test_int(k, expected, "zamax(case 59)"); }; { int N = 2; float X[] = { 0.487f, 0.918f }; int incX = -1; int expected = 0; int k; k = cblas_isamax(N, X, incX); gsl_test_int(k, expected, "samax(case 60)"); }; { int N = 2; double X[] = { 0.537, 0.826 }; int incX = -1; int expected = 0; int k; k = cblas_idamax(N, X, incX); gsl_test_int(k, expected, "damax(case 61)"); }; { int N = 2; float X[] = { 0.993f, 0.172f, -0.825f, 0.873f }; int incX = -1; int expected = 0; int k; k = cblas_icamax(N, X, incX); gsl_test_int(k, expected, "camax(case 62)"); }; { int N = 2; double X[] = { 0.913, -0.436, -0.134, 0.129 }; int incX = -1; int expected = 0; int k; k = cblas_izamax(N, X, incX); gsl_test_int(k, expected, "zamax(case 63)"); }; }
int F77_idamax(const int *N, const double *X, const int *incX) { if (*N < 1 || *incX < 1) return(0); return (cblas_idamax(*N, X, *incX)+1); }
extern "C" magma_int_t magma_zgeev_m( char jobvl, char jobvr, magma_int_t n, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *W, magmaDoubleComplex *vl, magma_int_t ldvl, magmaDoubleComplex *vr, magma_int_t ldvr, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t *info ) { /* -- MAGMA (version 1.4.1) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver December 2013 Purpose ======= ZGEEV computes for an N-by-N complex nonsymmetric matrix A, the eigenvalues and, optionally, the left and/or right eigenvectors. The right eigenvector v(j) of A satisfies A * v(j) = lambda(j) * v(j) where lambda(j) is its eigenvalue. The left eigenvector u(j) of A satisfies u(j)**H * A = lambda(j) * u(j)**H where u(j)**H denotes the conjugate transpose of u(j). The computed eigenvectors are normalized to have Euclidean norm equal to 1 and largest component real. Arguments ========= JOBVL (input) CHARACTER*1 = 'N': left eigenvectors of A are not computed; = 'V': left eigenvectors of are computed. JOBVR (input) CHARACTER*1 = 'N': right eigenvectors of A are not computed; = 'V': right eigenvectors of A are computed. N (input) INTEGER The order of the matrix A. N >= 0. A (input/output) COMPLEX*16 array, dimension (LDA,N) On entry, the N-by-N matrix A. On exit, A has been overwritten. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). W (output) COMPLEX*16 array, dimension (N) W contains the computed eigenvalues. VL (output) COMPLEX*16 array, dimension (LDVL,N) If JOBVL = 'V', the left eigenvectors u(j) are stored one after another in the columns of VL, in the same order as their eigenvalues. If JOBVL = 'N', VL is not referenced. u(j) = VL(:,j), the j-th column of VL. LDVL (input) INTEGER The leading dimension of the array VL. LDVL >= 1; if JOBVL = 'V', LDVL >= N. VR (output) COMPLEX*16 array, dimension (LDVR,N) If JOBVR = 'V', the right eigenvectors v(j) are stored one after another in the columns of VR, in the same order as their eigenvalues. If JOBVR = 'N', VR is not referenced. v(j) = VR(:,j), the j-th column of VR. LDVR (input) INTEGER The leading dimension of the array VR. LDVR >= 1; if JOBVR = 'V', LDVR >= N. WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= (1+nb)*N. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. RWORK (workspace) DOUBLE PRECISION array, dimension (2*N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if INFO = i, the QR algorithm failed to compute all the eigenvalues, and no eigenvectors have been computed; elements and i+1:N of W contain eigenvalues which have converged. ===================================================================== */ #define vl(i,j) (vl + (i) + (j)*ldvl) #define vr(i,j) (vr + (i) + (j)*ldvr) magma_int_t c_one = 1; magma_int_t c_zero = 0; double d__1, d__2; magmaDoubleComplex z__1, z__2; magmaDoubleComplex tmp; double scl; double dum[1], eps; double anrm, cscale, bignum, smlnum; magma_int_t i, k, ilo, ihi; magma_int_t ibal, ierr, itau, iwrk, nout, liwrk, i__1, i__2, nb; magma_int_t scalea, minwrk, irwork, lquery, wantvl, wantvr, select[1]; char side[2] = {0, 0}; char jobvl_[2] = {jobvl, 0}; char jobvr_[2] = {jobvr, 0}; irwork = 0; *info = 0; lquery = lwork == -1; wantvl = lapackf77_lsame( jobvl_, "V" ); wantvr = lapackf77_lsame( jobvr_, "V" ); if (! wantvl && ! lapackf77_lsame( jobvl_, "N" )) { *info = -1; } else if (! wantvr && ! lapackf77_lsame( jobvr_, "N" )) { *info = -2; } else if (n < 0) { *info = -3; } else if (lda < max(1,n)) { *info = -5; } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) { *info = -8; } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) { *info = -10; } /* Compute workspace */ nb = magma_get_zgehrd_nb( n ); if (*info == 0) { minwrk = (1+nb)*n; work[0] = MAGMA_Z_MAKE( minwrk, 0 ); if (lwork < minwrk && ! lquery) { *info = -12; } } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } else if (lquery) { return *info; } /* Quick return if possible */ if (n == 0) { return *info; } #if defined(Version3) || defined(Version4) || defined(Version5) magmaDoubleComplex *dT; if (MAGMA_SUCCESS != magma_zmalloc( &dT, nb*n )) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } #endif #if defined(Version4) || defined(Version5) magmaDoubleComplex *T; if (MAGMA_SUCCESS != magma_zmalloc_cpu( &T, nb*n )) { magma_free( dT ); *info = MAGMA_ERR_HOST_ALLOC; return *info; } #endif /* Get machine constants */ eps = lapackf77_dlamch( "P" ); smlnum = lapackf77_dlamch( "S" ); bignum = 1. / smlnum; lapackf77_dlabad( &smlnum, &bignum ); smlnum = magma_dsqrt( smlnum ) / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = lapackf77_zlange( "M", &n, &n, A, &lda, dum ); scalea = 0; if (anrm > 0. && anrm < smlnum) { scalea = 1; cscale = smlnum; } else if (anrm > bignum) { scalea = 1; cscale = bignum; } if (scalea) { lapackf77_zlascl( "G", &c_zero, &c_zero, &anrm, &cscale, &n, &n, A, &lda, &ierr ); } /* Balance the matrix * (CWorkspace: none) * (RWorkspace: need N) */ ibal = 0; lapackf77_zgebal( "B", &n, A, &lda, &ilo, &ihi, &rwork[ibal], &ierr ); /* Reduce to upper Hessenberg form * (CWorkspace: need 2*N, prefer N + N*NB) * (RWorkspace: none) */ itau = 0; iwrk = itau + n; liwrk = lwork - iwrk; #if defined(Version1) // Version 1 - LAPACK lapackf77_zgehrd( &n, &ilo, &ihi, A, &lda, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(Version2) // Version 2 - LAPACK consistent HRD magma_zgehrd2( n, ilo, ihi, A, lda, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(Version3) // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored, magma_zgehrd( n, ilo, ihi, A, lda, &work[itau], &work[iwrk], liwrk, dT, &ierr ); #elif defined(Version4) || defined(Version5) // Version 4 - Multi-GPU, T on host magma_zgehrd_m( n, ilo, ihi, A, lda, &work[itau], &work[iwrk], liwrk, T, &ierr ); magma_zsetmatrix( nb, n, T, nb, dT, nb ); #endif if (wantvl) { /* Want left eigenvectors * Copy Householder vectors to VL */ side[0] = 'L'; lapackf77_zlacpy( MagmaLowerStr, &n, &n, A, &lda, vl, &ldvl ); /* Generate unitary matrix in VL * (CWorkspace: need 2*N-1, prefer N + (N-1)*NB) * (RWorkspace: none) */ #if defined(Version1) || defined(Version2) // Version 1 & 2 - LAPACK lapackf77_zunghr( &n, &ilo, &ihi, vl, &ldvl, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(Version3) || defined(Version4) // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored magma_zunghr( n, ilo, ihi, vl, ldvl, &work[itau], dT, nb, &ierr ); #elif defined(Version5) // Version 5 - Multi-GPU, T on host magma_zunghr_m( n, ilo, ihi, vl, ldvl, &work[itau], T, nb, &ierr ); #endif /* Perform QR iteration, accumulating Schur vectors in VL * (CWorkspace: need 1, prefer HSWORK (see comments) ) * (RWorkspace: none) */ iwrk = itau; liwrk = lwork - iwrk; lapackf77_zhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, W, vl, &ldvl, &work[iwrk], &liwrk, info ); if (wantvr) { /* Want left and right eigenvectors * Copy Schur vectors to VR */ side[0] = 'B'; lapackf77_zlacpy( "F", &n, &n, vl, &ldvl, vr, &ldvr ); } } else if (wantvr) { /* Want right eigenvectors * Copy Householder vectors to VR */ side[0] = 'R'; lapackf77_zlacpy( "L", &n, &n, A, &lda, vr, &ldvr ); /* Generate unitary matrix in VR * (CWorkspace: need 2*N-1, prefer N + (N-1)*NB) * (RWorkspace: none) */ #if defined(Version1) || defined(Version2) // Version 1 & 2 - LAPACK lapackf77_zunghr( &n, &ilo, &ihi, vr, &ldvr, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(Version3) || defined(Version4) // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored magma_zunghr( n, ilo, ihi, vr, ldvr, &work[itau], dT, nb, &ierr ); #elif defined(Version5) // Version 5 - Multi-GPU, T on host magma_zunghr_m( n, ilo, ihi, vr, ldvr, &work[itau], T, nb, &ierr ); #endif /* Perform QR iteration, accumulating Schur vectors in VR * (CWorkspace: need 1, prefer HSWORK (see comments) ) * (RWorkspace: none) */ iwrk = itau; liwrk = lwork - iwrk; lapackf77_zhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, W, vr, &ldvr, &work[iwrk], &liwrk, info ); } else { /* Compute eigenvalues only * (CWorkspace: need 1, prefer HSWORK (see comments) ) * (RWorkspace: none) */ iwrk = itau; liwrk = lwork - iwrk; lapackf77_zhseqr( "E", "N", &n, &ilo, &ihi, A, &lda, W, vr, &ldvr, &work[iwrk], &liwrk, info ); } /* If INFO > 0 from ZHSEQR, then quit */ if (*info > 0) { goto CLEANUP; } if (wantvl || wantvr) { /* Compute left and/or right eigenvectors * (CWorkspace: need 2*N) * (RWorkspace: need 2*N) */ irwork = ibal + n; lapackf77_ztrevc( side, "B", select, &n, A, &lda, vl, &ldvl, vr, &ldvr, &n, &nout, &work[iwrk], &rwork[irwork], &ierr ); } if (wantvl) { /* Undo balancing of left eigenvectors * (CWorkspace: none) * (RWorkspace: need N) */ lapackf77_zgebak( "B", "L", &n, &ilo, &ihi, &rwork[ibal], &n, vl, &ldvl, &ierr ); /* Normalize left eigenvectors and make largest component real */ for (i = 0; i < n; ++i) { scl = 1. / cblas_dznrm2( n, vl(0,i), 1 ); cblas_zdscal( n, scl, vl(0,i), 1 ); for (k = 0; k < n; ++k) { /* Computing 2nd power */ d__1 = MAGMA_Z_REAL( *vl(k,i) ); d__2 = MAGMA_Z_IMAG( *vl(k,i) ); rwork[irwork + k] = d__1*d__1 + d__2*d__2; } k = cblas_idamax( n, &rwork[irwork], 1 ); z__2 = MAGMA_Z_CNJG( *vl(k,i) ); d__1 = magma_dsqrt( rwork[irwork + k] ); MAGMA_Z_DSCALE( z__1, z__2, d__1 ); tmp = z__1; cblas_zscal( n, CBLAS_SADDR(tmp), vl(0,i), 1 ); d__1 = MAGMA_Z_REAL( *vl(k,i) ); z__1 = MAGMA_Z_MAKE( d__1, 0 ); *vl(k,i) = z__1; } } if (wantvr) { /* Undo balancing of right eigenvectors * (CWorkspace: none) * (RWorkspace: need N) */ lapackf77_zgebak( "B", "R", &n, &ilo, &ihi, &rwork[ibal], &n, vr, &ldvr, &ierr ); /* Normalize right eigenvectors and make largest component real */ for (i = 0; i < n; ++i) { scl = 1. / cblas_dznrm2( n, vr(0,i), 1 ); cblas_zdscal( n, scl, vr(0,i), 1 ); for (k = 0; k < n; ++k) { /* Computing 2nd power */ d__1 = MAGMA_Z_REAL( *vr(k,i) ); d__2 = MAGMA_Z_IMAG( *vr(k,i) ); rwork[irwork + k] = d__1*d__1 + d__2*d__2; } k = cblas_idamax( n, &rwork[irwork], 1 ); z__2 = MAGMA_Z_CNJG( *vr(k,i) ); d__1 = magma_dsqrt( rwork[irwork + k] ); MAGMA_Z_DSCALE( z__1, z__2, d__1 ); tmp = z__1; cblas_zscal( n, CBLAS_SADDR(tmp), vr(0,i), 1 ); d__1 = MAGMA_Z_REAL( *vr(k,i) ); z__1 = MAGMA_Z_MAKE( d__1, 0 ); *vr(k,i) = z__1; } } CLEANUP: /* Undo scaling if necessary */ if (scalea) { i__1 = n - (*info); i__2 = max( n - (*info), 1 ); lapackf77_zlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one, W + (*info), &i__2, &ierr ); if (*info > 0) { i__1 = ilo - 1; lapackf77_zlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one, W, &n, &ierr ); } } #if defined(Version3) || defined(Version4) || defined(Version5) magma_free( dT ); #endif #if defined(Version4) || defined(Version5) magma_free_cpu( T ); #endif return *info; } /* magma_zgeev */
static void CORE_dgetrf_rectil_rec(const PLASMA_desc A, int *IPIV, int *info, double *pivot, int thidx, int thcnt, int column, int width, int ft, int lt) { int ld, jp, n1, n2, lm, tmpM, piv_sf; int ip, j, it, i, ldft; int max_i, max_it, thwin; double zone = 1.0; double mzone = -1.0; double tmp1; double tmp2 = 0.; double pivval; double *Atop, *Atop2, *U, *L; double abstmp1; int offset = A.i; ldft = BLKLDD(A, 0); Atop = A(0, 0) + column * ldft; if ( width > 1 ) { /* Assumption: N = min( M, N ); */ n1 = width / 2; n2 = width - n1; Atop2 = Atop + n1 * ldft; CORE_dgetrf_rectil_rec( A, IPIV, info, pivot, thidx, thcnt, column, n1, ft, lt ); if ( *info != 0 ) return; if (thidx == 0) { /* Swap to the right */ int *lipiv = IPIV+column; int idxMax = column+n1; for (j = column; j < idxMax; ++j, ++lipiv) { ip = (*lipiv) - offset - 1; if ( ip != j ) { it = ip / A.mb; i = ip % A.mb; ld = BLKLDD(A, it); cblas_dswap(n2, Atop2 + j, ldft, A(it, 0) + (column+n1)*ld + i, ld ); } } /* Trsm on the upper part */ U = Atop2 + column; cblas_dtrsm( CblasColMajor, CblasLeft, CblasLower, CblasNoTrans, CblasUnit, n1, n2, (zone), Atop + column, ldft, U, ldft ); /* SIgnal to other threads that they can start update */ CORE_dbarrier_thread( thidx, thcnt ); pivval = *pivot; if ( pivval == 0.0 ) { *info = column+n1; return; } else { if ( fabs(pivval) >= sfmin ) { piv_sf = 1; pivval = 1.0 / pivval; } else { piv_sf = 0; } } /* First tile */ { L = Atop + column + n1; tmpM = min(ldft, A.m) - column - n1; /* Scale last column of L */ if ( piv_sf ) { cblas_dscal( tmpM, (pivval), L+(n1-1)*ldft, 1 ); } else { int i; Atop2 = L+(n1-1)*ldft; for( i=0; i < tmpM; i++, Atop2++) *Atop2 = *Atop2 / pivval; } /* Apply the GEMM */ cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, tmpM, n2, n1, (mzone), L, ldft, U, ldft, (zone), U + n1, ldft ); /* Search Max in first column of U+n1 */ tmp2 = U[n1]; max_it = ft; max_i = cblas_idamax( tmpM, U+n1, 1 ) + n1; tmp1 = U[max_i]; abstmp1 = fabs(tmp1); max_i += column; } } else { pivval = *pivot; if ( pivval == 0.0 ) { *info = column+n1; return; } else { if ( fabs(pivval) >= sfmin ) { piv_sf = 1; pivval = 1.0 / pivval; } else { piv_sf = 0; } } ld = BLKLDD( A, ft ); L = A( ft, 0 ) + column * ld; lm = ft == A.mt-1 ? A.m - ft * A.mb : A.mb; U = Atop2 + column; /* First tile */ /* Scale last column of L */ if ( piv_sf ) { cblas_dscal( lm, (pivval), L+(n1-1)*ld, 1 ); } else { int i; Atop2 = L+(n1-1)*ld; for( i=0; i < lm; i++, Atop2++) *Atop2 = *Atop2 / pivval; } /* Wait for pivoting and triangular solve to be finished * before to really start the update */ CORE_dbarrier_thread( thidx, thcnt ); /* Apply the GEMM */ cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, lm, n2, n1, (mzone), L, ld, U, ldft, (zone), L + n1*ld, ld ); /* Search Max in first column of L+n1*ld */ max_it = ft; max_i = cblas_idamax( lm, L+n1*ld, 1 ); tmp1 = L[n1*ld+max_i]; abstmp1 = fabs(tmp1); } /* Update the other blocks */ for( it = ft+1; it < lt; it++) { ld = BLKLDD( A, it ); L = A( it, 0 ) + column * ld; lm = it == A.mt-1 ? A.m - it * A.mb : A.mb; /* Scale last column of L */ if ( piv_sf ) { cblas_dscal( lm, (pivval), L+(n1-1)*ld, 1 ); } else { int i; Atop2 = L+(n1-1)*ld; for( i=0; i < lm; i++, Atop2++) *Atop2 = *Atop2 / pivval; } /* Apply the GEMM */ cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, lm, n2, n1, (mzone), L, ld, U, ldft, (zone), L + n1*ld, ld ); /* Search the max on the first column of L+n1*ld */ jp = cblas_idamax( lm, L+n1*ld, 1 ); if ( fabs( L[n1*ld+jp] ) > abstmp1 ) { tmp1 = L[n1*ld+jp]; abstmp1 = fabs(tmp1); max_i = jp; max_it = it; } } jp = offset + max_it*A.mb + max_i; CORE_damax1_thread( tmp1, thidx, thcnt, &thwin, &tmp2, pivot, jp + 1, IPIV + column + n1 ); if ( thidx == 0 ) { U[n1] = *pivot; /* all threads have the pivot element: no need for synchronization */ } if (thwin == thidx) { /* the thread that owns the best pivot */ if ( jp-offset != column+n1 ) /* if there is a need to exchange the pivot */ { ld = BLKLDD(A, max_it); Atop2 = A( max_it, 0 ) + (column + n1 )* ld + max_i; *Atop2 = tmp2; } } CORE_dgetrf_rectil_rec( A, IPIV, info, pivot, thidx, thcnt, column+n1, n2, ft, lt ); if ( *info != 0 ) return; if ( thidx == 0 ) { /* Swap to the left */ int *lipiv = IPIV+column+n1; int idxMax = column+width; for (j = column+n1; j < idxMax; ++j, ++lipiv) { ip = (*lipiv) - offset - 1; if ( ip != j ) { it = ip / A.mb; i = ip % A.mb; ld = BLKLDD(A, it); cblas_dswap(n1, Atop + j, ldft, A(it, 0) + column*ld + i, ld ); } } } } else if ( width == 1 ) { /* Search maximum for column 0 */ if ( column == 0 ) { if ( thidx == 0 ) tmp2 = Atop[column]; /* First tmp1 */ ld = BLKLDD(A, ft); Atop2 = A( ft, 0 ); lm = ft == A.mt-1 ? A.m - ft * A.mb : A.mb; max_it = ft; max_i = cblas_idamax( lm, Atop2, 1 ); tmp1 = Atop2[max_i]; abstmp1 = fabs(tmp1); /* Update */ for( it = ft+1; it < lt; it++) { Atop2= A( it, 0 ); lm = it == A.mt-1 ? A.m - it * A.mb : A.mb; jp = cblas_idamax( lm, Atop2, 1 ); if ( fabs(Atop2[jp]) > abstmp1 ) { tmp1 = Atop2[jp]; abstmp1 = fabs(tmp1); max_i = jp; max_it = it; } } jp = offset + max_it*A.mb + max_i; CORE_damax1_thread( tmp1, thidx, thcnt, &thwin, &tmp2, pivot, jp + 1, IPIV + column ); if ( thidx == 0 ) { Atop[0] = *pivot; /* all threads have the pivot element: no need for synchronization */ } if (thwin == thidx) { /* the thread that owns the best pivot */ if ( jp-offset != 0 ) /* if there is a need to exchange the pivot */ { Atop2 = A( max_it, 0 ) + max_i; *Atop2 = tmp2; } } } CORE_dbarrier_thread( thidx, thcnt ); /* If it is the last column, we just scale */ if ( column == (min(A.m, A.n))-1 ) { pivval = *pivot; if ( pivval != 0.0 ) { if ( thidx == 0 ) { if ( fabs(pivval) >= sfmin ) { pivval = 1.0 / pivval; /* * We guess than we never enter the function with m == A.mt-1 * because it means that there is only one thread */ lm = ft == A.mt-1 ? A.m - ft * A.mb : A.mb; cblas_dscal( lm - column - 1, (pivval), Atop+column+1, 1 ); for( it = ft+1; it < lt; it++) { ld = BLKLDD(A, it); Atop2 = A( it, 0 ) + column * ld; lm = it == A.mt-1 ? A.m - it * A.mb : A.mb; cblas_dscal( lm, (pivval), Atop2, 1 ); } } else { /* * We guess than we never enter the function with m == A.mt-1 * because it means that there is only one thread */ int i; Atop2 = Atop + column + 1; lm = ft == A.mt-1 ? A.m - ft * A.mb : A.mb; for( i=0; i < lm-column-1; i++, Atop2++) *Atop2 = *Atop2 / pivval; for( it = ft+1; it < lt; it++) { ld = BLKLDD(A, it); Atop2 = A( it, 0 ) + column * ld; lm = it == A.mt-1 ? A.m - it * A.mb : A.mb; for( i=0; i < lm; i++, Atop2++) *Atop2 = *Atop2 / pivval; } } } else { if ( fabs(pivval) >= sfmin ) { pivval = 1.0 / pivval; for( it = ft; it < lt; it++) { ld = BLKLDD(A, it); Atop2 = A( it, 0 ) + column * ld; lm = it == A.mt-1 ? A.m - it * A.mb : A.mb; cblas_dscal( lm, (pivval), Atop2, 1 ); } } else { /* * We guess than we never enter the function with m == A.mt-1 * because it means that there is only one thread */ int i; for( it = ft; it < lt; it++) { ld = BLKLDD(A, it); Atop2 = A( it, 0 ) + column * ld; lm = it == A.mt-1 ? A.m - it * A.mb : A.mb; for( i=0; i < lm; i++, Atop2++) *Atop2 = *Atop2 / pivval; } } } } else { *info = column + 1; return; } } } }
extern "C" magma_int_t magma_dgeev( char jobvl, char jobvr, magma_int_t n, double *A, magma_int_t lda, double *WR, double *WI, double *vl, magma_int_t ldvl, double *vr, magma_int_t ldvr, double *work, magma_int_t lwork, magma_int_t *info ) { /* -- MAGMA (version 1.4.1) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver December 2013 Purpose ======= DGEEV computes for an N-by-N real nonsymmetric matrix A, the eigenvalues and, optionally, the left and/or right eigenvectors. The right eigenvector v(j) of A satisfies A * v(j) = lambda(j) * v(j) where lambda(j) is its eigenvalue. The left eigenvector u(j) of A satisfies u(j)**T * A = lambda(j) * u(j)**T where u(j)**T denotes the transpose of u(j). The computed eigenvectors are normalized to have Euclidean norm equal to 1 and largest component real. Arguments ========= JOBVL (input) CHARACTER*1 = 'N': left eigenvectors of A are not computed; = 'V': left eigenvectors of are computed. JOBVR (input) CHARACTER*1 = 'N': right eigenvectors of A are not computed; = 'V': right eigenvectors of A are computed. N (input) INTEGER The order of the matrix A. N >= 0. A (input/output) DOUBLE PRECISION array, dimension (LDA,N) On entry, the N-by-N matrix A. On exit, A has been overwritten. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). WR (output) DOUBLE PRECISION array, dimension (N) WI (output) DOUBLE PRECISION array, dimension (N) WR and WI contain the real and imaginary parts, respectively, of the computed eigenvalues. Complex conjugate pairs of eigenvalues appear consecutively with the eigenvalue having the positive imaginary part first. VL (output) DOUBLE PRECISION array, dimension (LDVL,N) If JOBVL = 'V', the left eigenvectors u(j) are stored one after another in the columns of VL, in the same order as their eigenvalues. If JOBVL = 'N', VL is not referenced. u(j) = VL(:,j), the j-th column of VL. LDVL (input) INTEGER The leading dimension of the array VL. LDVL >= 1; if JOBVL = 'V', LDVL >= N. VR (output) DOUBLE PRECISION array, dimension (LDVR,N) If JOBVR = 'V', the right eigenvectors v(j) are stored one after another in the columns of VR, in the same order as their eigenvalues. If JOBVR = 'N', VR is not referenced. v(j) = VR(:,j), the j-th column of VR. LDVR (input) INTEGER The leading dimension of the array VR. LDVR >= 1; if JOBVR = 'V', LDVR >= N. WORK (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= (2+nb)*N. For optimal performance, LWORK >= (2+2*nb)*N. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if INFO = i, the QR algorithm failed to compute all the eigenvalues, and no eigenvectors have been computed; elements and i+1:N of W contain eigenvalues which have converged. ===================================================================== */ #define vl(i,j) (vl + (i) + (j)*ldvl) #define vr(i,j) (vr + (i) + (j)*ldvr) magma_int_t c_one = 1; magma_int_t c_zero = 0; double d__1, d__2; double r, cs, sn, scl; double dum[1], eps; double anrm, cscale, bignum, smlnum; magma_int_t i, k, ilo, ihi; magma_int_t ibal, ierr, itau, iwrk, nout, liwrk, i__1, i__2, nb; magma_int_t scalea, minwrk, lquery, wantvl, wantvr, select[1]; char side[2] = {0, 0}; char jobvl_[2] = {jobvl, 0}; char jobvr_[2] = {jobvr, 0}; *info = 0; lquery = lwork == -1; wantvl = lapackf77_lsame( jobvl_, "V" ); wantvr = lapackf77_lsame( jobvr_, "V" ); if (! wantvl && ! lapackf77_lsame( jobvl_, "N" )) { *info = -1; } else if (! wantvr && ! lapackf77_lsame( jobvr_, "N" )) { *info = -2; } else if (n < 0) { *info = -3; } else if (lda < max(1,n)) { *info = -5; } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) { *info = -9; } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) { *info = -11; } /* Compute workspace */ nb = magma_get_dgehrd_nb( n ); if (*info == 0) { minwrk = (2+nb)*n; work[0] = MAGMA_D_MAKE( (double) minwrk, 0. ); if (lwork < minwrk && ! lquery) { *info = -13; } } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } else if (lquery) { return *info; } /* Quick return if possible */ if (n == 0) { return *info; } #if defined(VERSION3) double *dT; if (MAGMA_SUCCESS != magma_dmalloc( &dT, nb*n )) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } #endif /* Get machine constants */ eps = lapackf77_dlamch( "P" ); smlnum = lapackf77_dlamch( "S" ); bignum = 1. / smlnum; lapackf77_dlabad( &smlnum, &bignum ); smlnum = magma_dsqrt( smlnum ) / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = lapackf77_dlange( "M", &n, &n, A, &lda, dum ); scalea = 0; if (anrm > 0. && anrm < smlnum) { scalea = 1; cscale = smlnum; } else if (anrm > bignum) { scalea = 1; cscale = bignum; } if (scalea) { lapackf77_dlascl( "G", &c_zero, &c_zero, &anrm, &cscale, &n, &n, A, &lda, &ierr ); } /* Balance the matrix * (Workspace: need N) */ ibal = 0; lapackf77_dgebal( "B", &n, A, &lda, &ilo, &ihi, &work[ibal], &ierr ); /* Reduce to upper Hessenberg form * (Workspace: need 3*N, prefer 2*N + N*NB) */ itau = ibal + n; iwrk = itau + n; liwrk = lwork - iwrk; #if defined(VERSION1) // Version 1 - LAPACK lapackf77_dgehrd( &n, &ilo, &ihi, A, &lda, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(VERSION2) // Version 2 - LAPACK consistent HRD magma_dgehrd2( n, ilo, ihi, A, lda, &work[itau], &work[iwrk], liwrk, &ierr ); #elif defined(VERSION3) // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored, magma_dgehrd( n, ilo, ihi, A, lda, &work[itau], &work[iwrk], liwrk, dT, &ierr ); #endif if (wantvl) { /* Want left eigenvectors * Copy Householder vectors to VL */ side[0] = 'L'; lapackf77_dlacpy( MagmaLowerStr, &n, &n, A, &lda, vl, &ldvl ); /* Generate orthogonal matrix in VL * (Workspace: need 3*N-1, prefer 2*N + (N-1)*NB) */ #if defined(VERSION1) || defined(VERSION2) // Version 1 & 2 - LAPACK lapackf77_dorghr( &n, &ilo, &ihi, vl, &ldvl, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(VERSION3) // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored magma_dorghr( n, ilo, ihi, vl, ldvl, &work[itau], dT, nb, &ierr ); #endif /* Perform QR iteration, accumulating Schur vectors in VL * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ iwrk = itau; liwrk = lwork - iwrk; lapackf77_dhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, WR, WI, vl, &ldvl, &work[iwrk], &liwrk, info ); if (wantvr) { /* Want left and right eigenvectors * Copy Schur vectors to VR */ side[0] = 'B'; lapackf77_dlacpy( "F", &n, &n, vl, &ldvl, vr, &ldvr ); } } else if (wantvr) { /* Want right eigenvectors * Copy Householder vectors to VR */ side[0] = 'R'; lapackf77_dlacpy( "L", &n, &n, A, &lda, vr, &ldvr ); /* Generate orthogonal matrix in VR * (Workspace: need 3*N-1, prefer 2*N + (N-1)*NB) */ #if defined(VERSION1) || defined(VERSION2) // Version 1 & 2 - LAPACK lapackf77_dorghr( &n, &ilo, &ihi, vr, &ldvr, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(VERSION3) // Version 3 - LAPACK consistent MAGMA HRD + T matrices stored magma_dorghr( n, ilo, ihi, vr, ldvr, &work[itau], dT, nb, &ierr ); #endif /* Perform QR iteration, accumulating Schur vectors in VR * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ iwrk = itau; liwrk = lwork - iwrk; lapackf77_dhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, WR, WI, vr, &ldvr, &work[iwrk], &liwrk, info ); } else { /* Compute eigenvalues only * (Workspace: need N+1, prefer N+HSWORK (see comments) ) */ iwrk = itau; liwrk = lwork - iwrk; lapackf77_dhseqr( "E", "N", &n, &ilo, &ihi, A, &lda, WR, WI, vr, &ldvr, &work[iwrk], &liwrk, info ); } /* If INFO > 0 from DHSEQR, then quit */ if (*info > 0) { goto CLEANUP; } if (wantvl || wantvr) { /* Compute left and/or right eigenvectors * (Workspace: need 4*N) */ liwrk = lwork - iwrk; #if TREVC_VERSION == 1 lapackf77_dtrevc( side, "B", select, &n, A, &lda, vl, &ldvl, vr, &ldvr, &n, &nout, &work[iwrk], &ierr ); #elif TREVC_VERSION == 2 lapackf77_dtrevc3( side, "B", select, &n, A, &lda, vl, &ldvl, vr, &ldvr, &n, &nout, &work[iwrk], &liwrk, &ierr ); #endif } if (wantvl) { /* Undo balancing of left eigenvectors * (Workspace: need N) */ lapackf77_dgebak( "B", "L", &n, &ilo, &ihi, &work[ibal], &n, vl, &ldvl, &ierr ); /* Normalize left eigenvectors and make largest component real */ for (i = 0; i < n; ++i) { if ( WI[i] == 0. ) { scl = 1. / cblas_dnrm2( n, vl(0,i), 1 ); cblas_dscal( n, scl, vl(0,i), 1 ); } else if ( WI[i] > 0. ) { d__1 = cblas_dnrm2( n, vl(0,i), 1 ); d__2 = cblas_dnrm2( n, vl(0,i+1), 1 ); scl = 1. / lapackf77_dlapy2( &d__1, &d__2 ); cblas_dscal( n, scl, vl(0,i), 1 ); cblas_dscal( n, scl, vl(0,i+1), 1 ); for (k = 0; k < n; ++k) { /* Computing 2nd power */ d__1 = *vl(k,i); d__2 = *vl(k,i+1); work[iwrk + k] = d__1*d__1 + d__2*d__2; } k = cblas_idamax( n, &work[iwrk], 1 ); lapackf77_dlartg( vl(k,i), vl(k,i+1), &cs, &sn, &r ); cblas_drot( n, vl(0,i), 1, vl(0,i+1), 1, cs, sn ); *vl(k,i+1) = 0.; } } } if (wantvr) { /* Undo balancing of right eigenvectors * (Workspace: need N) */ lapackf77_dgebak( "B", "R", &n, &ilo, &ihi, &work[ibal], &n, vr, &ldvr, &ierr ); /* Normalize right eigenvectors and make largest component real */ for (i = 0; i < n; ++i) { if ( WI[i] == 0. ) { scl = 1. / cblas_dnrm2( n, vr(0,i), 1 ); cblas_dscal( n, scl, vr(0,i), 1 ); } else if ( WI[i] > 0. ) { d__1 = cblas_dnrm2( n, vr(0,i), 1 ); d__2 = cblas_dnrm2( n, vr(0,i+1), 1 ); scl = 1. / lapackf77_dlapy2( &d__1, &d__2 ); cblas_dscal( n, scl, vr(0,i), 1 ); cblas_dscal( n, scl, vr(0,i+1), 1 ); for (k = 0; k < n; ++k) { /* Computing 2nd power */ d__1 = *vr(k,i); d__2 = *vr(k,i+1); work[iwrk + k] = d__1*d__1 + d__2*d__2; } k = cblas_idamax( n, &work[iwrk], 1 ); lapackf77_dlartg( vr(k,i), vr(k,i+1), &cs, &sn, &r ); cblas_drot( n, vr(0,i), 1, vr(0,i+1), 1, cs, sn ); *vr(k,i+1) = 0.; } } } CLEANUP: /* Undo scaling if necessary */ if (scalea) { i__1 = n - (*info); i__2 = max( n - (*info), 1 ); lapackf77_dlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one, WR + (*info), &i__2, &ierr ); lapackf77_dlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one, WI + (*info), &i__2, &ierr ); if (*info > 0) { i__1 = ilo - 1; lapackf77_dlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one, WR, &n, &ierr ); lapackf77_dlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one, WI, &n, &ierr ); } } #if defined(VERSION3) magma_free( dT ); #endif return *info; } /* magma_dgeev */
void dgetf2( long m, long n, double a[], long lda, long ipiv[], long *info ) { /** * -- LAPACK routine (version 1.1) -- * Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., * Courant Institute, Argonne National Lab, and Rice University * June 30, 1992 * * .. Scalar Arguments ..*/ /** .. * .. Array Arguments ..*/ #undef ipiv_1 #define ipiv_1(a1) ipiv[a1-1] #undef a_2 #define a_2(a1,a2) a[a1-1+lda*(a2-1)] /** .. * * Purpose * ======= * * DGETF2 computes an LU factorization of a general m-by-n matrix A * using partial pivoting with row interchanges. * * The factorization has the form * A = P * L * U * where P is a permutation matrix, L is lower triangular with unit * diagonal elements (lower trapezoidal if m > n), and U is upper * triangular (upper trapezoidal if m < n). * * This is the right-looking Level 2 BLAS version of the algorithm. * * Arguments * ========= * * M (input) INTEGER * The number of rows of the matrix A. M >= 0. * * N (input) INTEGER * The number of columns of the matrix A. N >= 0. * * A (input/output) DOUBLE PRECISION array, dimension (LDA,N) * On entry, the m by n matrix to be factored. * On exit, the factors L and U from the factorization * A = P*L*U; the unit diagonal elements of L are not stored. * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= max(1,M). * * IPIV (output) INTEGER array, dimension (min(M,N)) * The pivot indices; for 1 <= i <= min(M,N), row i of the * matrix was interchanged with row IPIV(i). * * INFO (output) INTEGER * = 0: successful exit * < 0: if INFO = -k, the k-th argument had an illegal value * > 0: if INFO = k, U(k,k) is exactly zero. The factorization * has been completed, but the factor U is exactly * singular, and division by zero will occur if it is used * to solve a system of equations. * * ===================================================================== * * .. Parameters ..*/ #undef one #define one 1.0e+0 #undef zero #define zero 0.0e+0 /** .. * .. Local Scalars ..*/ long j, jp; /** .. * .. Intrinsic Functions ..*/ /* intrinsic max, min;*/ /** .. * .. Executable Statements .. * * Test the input parameters. **/ /*-----implicit-declarations-----*/ /*-----end-of-declarations-----*/ *info = 0; if( m<0 ) { *info = -1; } else if( n<0 ) { *info = -2; } else if( lda<max( 1, m ) ) { *info = -4; } if( *info!=0 ) { xerbla( "dgetf2", -*info ); return; } /** * Quick return if possible **/ if( m==0 || n==0 ) return; for (j=1 ; j<=min( m, n ) ; j+=1) { /** * Find pivot and test for singularity. **/ jp = j + cblas_idamax( m-j+1, &a_2( j, j ), 1 ); ipiv_1( j ) = jp; if( a_2( jp, j )!=zero ) { /** * Apply the interchange to columns 1:N. **/ if( jp!=j ) cblas_dswap( n, &a_2( j, 1 ), lda, &a_2( jp, 1 ), lda ); /** * Compute elements J+1:M of J-th column. **/ if( j<m ) cblas_dscal( m-j, one / a_2( j, j ), &a_2( j+1, j ), 1 ); } else if( *info==0 ) { *info = j; } if( j<min( m, n ) ) { /** * Update trailing submatrix. **/ cblas_dger(CblasColMajor, m-j, n-j, -one, &a_2( j+1, j ), 1, &a_2( j, j+1 ), lda, &a_2( j+1, j+1 ), lda ); } } return; /** * End of DGETF2 **/ }
// // Overloaded function for dispatching to // * CBLAS backend, and // * double value-type. // inline std::ptrdiff_t iamax( const int n, const double* x, const int incx ) { return cblas_idamax( n, x, incx ); }
int CORE_dtstrf(int M, int N, int IB, int NB, double *U, int LDU, double *A, int LDA, double *L, int LDL, int *IPIV, double *WORK, int LDWORK, int *INFO) { static double zzero = 0.0; static double mzone =-1.0; double alpha; int i, j, ii, sb; int im, ip; /* Check input arguments */ *INFO = 0; if (M < 0) { coreblas_error(1, "Illegal value of M"); return -1; } if (N < 0) { coreblas_error(2, "Illegal value of N"); return -2; } if (IB < 0) { coreblas_error(3, "Illegal value of IB"); return -3; } if ((LDU < max(1,NB)) && (NB > 0)) { coreblas_error(6, "Illegal value of LDU"); return -6; } if ((LDA < max(1,M)) && (M > 0)) { coreblas_error(8, "Illegal value of LDA"); return -8; } if ((LDL < max(1,IB)) && (IB > 0)) { coreblas_error(10, "Illegal value of LDL"); return -10; } /* Quick return */ if ((M == 0) || (N == 0) || (IB == 0)) return PLASMA_SUCCESS; /* Set L to 0 */ memset(L, 0, LDL*N*sizeof(double)); ip = 0; for (ii = 0; ii < N; ii += IB) { sb = min(N-ii, IB); for (i = 0; i < sb; i++) { im = cblas_idamax(M, &A[LDA*(ii+i)], 1); IPIV[ip] = ii+i+1; if (fabs(A[LDA*(ii+i)+im]) > fabs(U[LDU*(ii+i)+ii+i])) { /* * Swap behind. */ cblas_dswap(i, &L[LDL*ii+i], LDL, &WORK[im], LDWORK ); /* * Swap ahead. */ cblas_dswap(sb-i, &U[LDU*(ii+i)+ii+i], LDU, &A[LDA*(ii+i)+im], LDA ); /* * Set IPIV. */ IPIV[ip] = NB + im + 1; for (j = 0; j < i; j++) { A[LDA*(ii+j)+im] = zzero; } } if ((*INFO == 0) && (fabs(A[LDA*(ii+i)+im]) == zzero) && (fabs(U[LDU*(ii+i)+ii+i]) == zzero)) { *INFO = ii+i+1; } alpha = ((double)1. / U[LDU*(ii+i)+ii+i]); cblas_dscal(M, (alpha), &A[LDA*(ii+i)], 1); cblas_dcopy(M, &A[LDA*(ii+i)], 1, &WORK[LDWORK*i], 1); cblas_dger( CblasColMajor, M, sb-i-1, (mzone), &A[LDA*(ii+i)], 1, &U[LDU*(ii+i+1)+ii+i], LDU, &A[LDA*(ii+i+1)], LDA ); ip = ip+1; } /* * Apply the subpanel to the rest of the panel. */ if(ii+i < N) { for(j = ii; j < ii+sb; j++) { if (IPIV[j] <= NB) { IPIV[j] = IPIV[j] - ii; } } CORE_dssssm( NB, N-(ii+sb), M, N-(ii+sb), sb, sb, &U[LDU*(ii+sb)+ii], LDU, &A[LDA*(ii+sb)], LDA, &L[LDL*ii], LDL, WORK, LDWORK, &IPIV[ii]); for(j = ii; j < ii+sb; j++) { if (IPIV[j] <= NB) { IPIV[j] = IPIV[j] + ii; } } } } return PLASMA_SUCCESS; }
* * N (local input) const int * On entry, N specifies the length of the vector x. N must be * at least zero. * * X (local input) const double * * On entry, X is an incremented array of dimension at least * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. * * INCX (local input) const int * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * --------------------------------------------------------------------- */ START_TRACE( IDAMAX ) int imax = 0; imax = cblas_idamax( N, X, INCX ); END_TRACE return( imax ); /* * End of HPL_idamax */ } #endif
double vector_t::max_abs() const { CBLAS_INDEX index = cblas_idamax(len, data.get(), inc); return (*this)[index]; }
static double cblas_dnrm_infty( const int N, const double *X, const int incX ) { return fabs( X[ cblas_idamax( N, X, incX ) ] ); }
JNIEXPORT jint JNICALL Java_uncomplicate_neanderthal_CBLAS_idamax (JNIEnv *env, jclass clazz, jint N, jobject X, jint offsetX, jint incX) { double *cX = (double *) (*env)->GetDirectBufferAddress(env, X); return cblas_idamax(N, cX + offsetX, incX); };
extern "C" magma_int_t magma_zgeev(magma_vec_t jobvl, magma_vec_t jobvr, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *geev_w_array, magmaDoubleComplex *vl, magma_int_t ldvl, magmaDoubleComplex *vr, magma_int_t ldvr, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t *info, magma_queue_t queue) { /* -- clMAGMA (version 1.0.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver September 2012 Purpose ======= ZGEEV computes for an N-by-N complex nonsymmetric matrix A, the eigenvalues and, optionally, the left and/or right eigenvectors. The right eigenvector v(j) of A satisfies A * v(j) = lambda(j) * v(j) where lambda(j) is its eigenvalue. The left eigenvector u(j) of A satisfies u(j)**H * A = lambda(j) * u(j)**H where u(j)**H denotes the conjugate transpose of u(j). The computed eigenvectors are normalized to have Euclidean norm equal to 1 and largest component real. Arguments ========= JOBVL (input) CHARACTER*1 = 'N': left eigenvectors of A are not computed; = 'V': left eigenvectors of are computed. JOBVR (input) CHARACTER*1 = 'N': right eigenvectors of A are not computed; = 'V': right eigenvectors of A are computed. N (input) INTEGER The order of the matrix A. N >= 0. A (input/output) COMPLEX*16 array, dimension (LDA,N) On entry, the N-by-N matrix A. On exit, A has been overwritten. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). W (output) COMPLEX*16 array, dimension (N) W contains the computed eigenvalues. VL (output) COMPLEX*16 array, dimension (LDVL,N) If JOBVL = 'V', the left eigenvectors u(j) are stored one after another in the columns of VL, in the same order as their eigenvalues. If JOBVL = 'N', VL is not referenced. u(j) = VL(:,j), the j-th column of VL. LDVL (input) INTEGER The leading dimension of the array VL. LDVL >= 1; if JOBVL = 'V', LDVL >= N. VR (output) COMPLEX*16 array, dimension (LDVR,N) If JOBVR = 'V', the right eigenvectors v(j) are stored one after another in the columns of VR, in the same order as their eigenvalues. If JOBVR = 'N', VR is not referenced. v(j) = VR(:,j), the j-th column of VR. LDVR (input) INTEGER The leading dimension of the array VR. LDVR >= 1; if JOBVR = 'V', LDVR >= N. WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= (1+nb)*N. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. RWORK (workspace) DOUBLE PRECISION array, dimension (2*N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if INFO = i, the QR algorithm failed to compute all the eigenvalues, and no eigenvectors have been computed; elements and i+1:N of W contain eigenvalues which have converged. ===================================================================== */ magma_int_t c__1 = 1; magma_int_t c__0 = 0; magma_int_t a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2, i__3; double d__1, d__2; magmaDoubleComplex z__1, z__2; magma_int_t i__, k, ihi; double scl; magma_int_t ilo; double dum[1], eps; magmaDoubleComplex tmp; magma_int_t ibal; double anrm; magma_int_t ierr, itau, iwrk, nout; magma_int_t scalea; double cscale; magma_int_t select[1]; double bignum; magma_int_t minwrk; magma_int_t wantvl; double smlnum; magma_int_t irwork; magma_int_t lquery, wantvr; magma_int_t nb = 0; magmaDoubleComplex_ptr dT; //magma_timestr_t start, end; char side[2] = {0, 0}; magma_vec_t jobvl_ = jobvl; magma_vec_t jobvr_ = jobvr; *info = 0; lquery = lwork == -1; wantvl = lapackf77_lsame(lapack_const(jobvl_), "V"); wantvr = lapackf77_lsame(lapack_const(jobvr_), "V"); if (! wantvl && ! lapackf77_lsame(lapack_const(jobvl_), "N")) { *info = -1; } else if (! wantvr && ! lapackf77_lsame(lapack_const(jobvr_), "N")) { *info = -2; } else if (n < 0) { *info = -3; } else if (lda < max(1,n)) { *info = -5; } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) { *info = -8; } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) { *info = -10; } /* Compute workspace */ if (*info == 0) { nb = magma_get_zgehrd_nb(n); minwrk = (1+nb)*n; work[0] = MAGMA_Z_MAKE((double) minwrk, 0.); if (lwork < minwrk && ! lquery) { *info = -12; } } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } else if (lquery) { return *info; } /* Quick return if possible */ if (n == 0) { return *info; } // if eigenvectors are needed #if defined(VERSION3) if (MAGMA_SUCCESS != magma_malloc(&dT, nb*n*sizeof(magmaDoubleComplex) )) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } #endif a_dim1 = lda; a_offset = 1 + a_dim1; a -= a_offset; vl_dim1 = ldvl; vl_offset = 1 + vl_dim1; vl -= vl_offset; vr_dim1 = ldvr; vr_offset = 1 + vr_dim1; vr -= vr_offset; --work; --rwork; /* Get machine constants */ eps = lapackf77_dlamch("P"); smlnum = lapackf77_dlamch("S"); bignum = 1. / smlnum; lapackf77_dlabad(&smlnum, &bignum); smlnum = magma_dsqrt(smlnum) / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = lapackf77_zlange("M", &n, &n, &a[a_offset], &lda, dum); scalea = 0; if (anrm > 0. && anrm < smlnum) { scalea = 1; cscale = smlnum; } else if (anrm > bignum) { scalea = 1; cscale = bignum; } if (scalea) { lapackf77_zlascl("G", &c__0, &c__0, &anrm, &cscale, &n, &n, &a[a_offset], &lda, & ierr); } /* Balance the matrix (CWorkspace: none) (RWorkspace: need N) */ ibal = 1; lapackf77_zgebal("B", &n, &a[a_offset], &lda, &ilo, &ihi, &rwork[ibal], &ierr); /* Reduce to upper Hessenberg form (CWorkspace: need 2*N, prefer N+N*NB) (RWorkspace: none) */ itau = 1; iwrk = itau + n; i__1 = lwork - iwrk + 1; //start = get_current_time(); #if defined(VERSION1) /* * Version 1 - LAPACK */ lapackf77_zgehrd(&n, &ilo, &ihi, &a[a_offset], &lda, &work[itau], &work[iwrk], &i__1, &ierr); #elif defined(VERSION2) /* * Version 2 - LAPACK consistent HRD */ magma_zgehrd2(n, ilo, ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, &ierr); #elif defined(VERSION3) /* * Version 3 - LAPACK consistent MAGMA HRD + matrices T stored, */ magma_zgehrd(n, ilo, ihi, &a[a_offset], lda, &work[itau], &work[iwrk], i__1, dT, 0, &ierr, queue); #endif //end = get_current_time(); //printf(" Time for zgehrd = %5.2f sec\n", GetTimerValue(start,end)/1000.); if (wantvl) { /* Want left eigenvectors Copy Householder vectors to VL */ side[0] = 'L'; lapackf77_zlacpy(MagmaLowerStr, &n, &n, &a[a_offset], &lda, &vl[vl_offset], &ldvl); /* Generate unitary matrix in VL (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) (RWorkspace: none) */ i__1 = lwork - iwrk + 1; //start = get_current_time(); #if defined(VERSION1) || defined(VERSION2) /* * Version 1 & 2 - LAPACK */ lapackf77_zunghr(&n, &ilo, &ihi, &vl[vl_offset], &ldvl, &work[itau], &work[iwrk], &i__1, &ierr); #elif defined(VERSION3) /* * Version 3 - LAPACK consistent MAGMA HRD + matrices T stored */ magma_zunghr(n, ilo, ihi, &vl[vl_offset], ldvl, &work[itau], dT, 0, nb, &ierr, queue); #endif //end = get_current_time(); //printf(" Time for zunghr = %5.2f sec\n", GetTimerValue(start,end)/1000.); /* Perform QR iteration, accumulating Schur vectors in VL (CWorkspace: need 1, prefer HSWORK (see comments) ) (RWorkspace: none) */ iwrk = itau; i__1 = lwork - iwrk + 1; lapackf77_zhseqr("S", "V", &n, &ilo, &ihi, &a[a_offset], &lda, geev_w_array, &vl[vl_offset], &ldvl, &work[iwrk], &i__1, info); if (wantvr) { /* Want left and right eigenvectors Copy Schur vectors to VR */ side[0] = 'B'; lapackf77_zlacpy("F", &n, &n, &vl[vl_offset], &ldvl, &vr[vr_offset], &ldvr); } } else if (wantvr) { /* Want right eigenvectors Copy Householder vectors to VR */ side[0] = 'R'; lapackf77_zlacpy("L", &n, &n, &a[a_offset], &lda, &vr[vr_offset], &ldvr); /* Generate unitary matrix in VR (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) (RWorkspace: none) */ i__1 = lwork - iwrk + 1; //start = get_current_time(); #if defined(VERSION1) || defined(VERSION2) /* * Version 1 & 2 - LAPACK */ lapackf77_zunghr(&n, &ilo, &ihi, &vr[vr_offset], &ldvr, &work[itau], &work[iwrk], &i__1, &ierr); #elif defined(VERSION3) /* * Version 3 - LAPACK consistent MAGMA HRD + matrices T stored */ magma_zunghr(n, ilo, ihi, &vr[vr_offset], ldvr, &work[itau], dT, 0, nb, &ierr, queue); #endif //end = get_current_time(); //printf(" Time for zunghr = %5.2f sec\n", GetTimerValue(start,end)/1000.); /* Perform QR iteration, accumulating Schur vectors in VR (CWorkspace: need 1, prefer HSWORK (see comments) ) (RWorkspace: none) */ iwrk = itau; i__1 = lwork - iwrk + 1; lapackf77_zhseqr("S", "V", &n, &ilo, &ihi, &a[a_offset], &lda, geev_w_array, &vr[vr_offset], &ldvr, &work[iwrk], &i__1, info); } else { /* Compute eigenvalues only (CWorkspace: need 1, prefer HSWORK (see comments) ) (RWorkspace: none) */ iwrk = itau; i__1 = lwork - iwrk + 1; lapackf77_zhseqr("E", "N", &n, &ilo, &ihi, &a[a_offset], &lda, geev_w_array, &vr[vr_offset], &ldvr, &work[iwrk], &i__1, info); } /* If INFO > 0 from ZHSEQR, then quit */ if (*info > 0) { goto L50; } if (wantvl || wantvr) { /* Compute left and/or right eigenvectors (CWorkspace: need 2*N) (RWorkspace: need 2*N) */ irwork = ibal + n; lapackf77_ztrevc(side, "B", select, &n, &a[a_offset], &lda, &vl[vl_offset], &ldvl, &vr[vr_offset], &ldvr, &n, &nout, &work[iwrk], &rwork[irwork], &ierr); } if (wantvl) { /* Undo balancing of left eigenvectors (CWorkspace: none) (RWorkspace: need N) */ lapackf77_zgebak("B", "L", &n, &ilo, &ihi, &rwork[ibal], &n, &vl[vl_offset], &ldvl, &ierr); /* Normalize left eigenvectors and make largest component real */ for (i__ = 1; i__ <= n; ++i__) { scl = 1. / cblas_dznrm2(n, &vl[i__ * vl_dim1 + 1], 1); cblas_zdscal(n, scl, &vl[i__ * vl_dim1 + 1], 1); i__2 = n; for (k = 1; k <= i__2; ++k) { i__3 = k + i__ * vl_dim1; /* Computing 2nd power */ d__1 = MAGMA_Z_REAL(vl[i__3]); /* Computing 2nd power */ d__2 = MAGMA_Z_IMAG(vl[k + i__ * vl_dim1]); rwork[irwork + k - 1] = d__1 * d__1 + d__2 * d__2; } /* Comment: Fortran BLAS does not have to add 1 C BLAS must add one to cblas_idamax */ k = cblas_idamax(n, &rwork[irwork], 1)+1; z__2 = MAGMA_Z_CNJG(vl[k + i__ * vl_dim1]); d__1 = magma_dsqrt(rwork[irwork + k - 1]); MAGMA_Z_DSCALE(z__1, z__2, d__1); tmp = z__1; cblas_zscal(n, CBLAS_SADDR(tmp), &vl[i__ * vl_dim1 + 1], 1); i__2 = k + i__ * vl_dim1; i__3 = k + i__ * vl_dim1; d__1 = MAGMA_Z_REAL(vl[i__3]); MAGMA_Z_SET2REAL(z__1, d__1); vl[i__2] = z__1; } } if (wantvr) { /* Undo balancing of right eigenvectors (CWorkspace: none) (RWorkspace: need N) */ lapackf77_zgebak("B", "R", &n, &ilo, &ihi, &rwork[ibal], &n, &vr[vr_offset], &ldvr, &ierr); /* Normalize right eigenvectors and make largest component real */ for (i__ = 1; i__ <= n; ++i__) { scl = 1. / cblas_dznrm2(n, &vr[i__ * vr_dim1 + 1], 1); cblas_zdscal(n, scl, &vr[i__ * vr_dim1 + 1], 1); i__2 = n; for (k = 1; k <= i__2; ++k) { i__3 = k + i__ * vr_dim1; /* Computing 2nd power */ d__1 = MAGMA_Z_REAL(vr[i__3]); /* Computing 2nd power */ d__2 = MAGMA_Z_IMAG(vr[k + i__ * vr_dim1]); rwork[irwork + k - 1] = d__1 * d__1 + d__2 * d__2; } /* Comment: Fortran BLAS does not have to add 1 C BLAS must add one to cblas_idamax */ k = cblas_idamax(n, &rwork[irwork], 1)+1; z__2 = MAGMA_Z_CNJG(vr[k + i__ * vr_dim1]); d__1 = magma_dsqrt(rwork[irwork + k - 1]); MAGMA_Z_DSCALE(z__1, z__2, d__1); tmp = z__1; cblas_zscal(n, CBLAS_SADDR(tmp), &vr[i__ * vr_dim1 + 1], 1); i__2 = k + i__ * vr_dim1; i__3 = k + i__ * vr_dim1; d__1 = MAGMA_Z_REAL(vr[i__3]); MAGMA_Z_SET2REAL(z__1, d__1); vr[i__2] = z__1; } } /* Undo scaling if necessary */ L50: if (scalea) { i__1 = n - *info; /* Computing MAX */ i__3 = n - *info; i__2 = max(i__3,1); lapackf77_zlascl("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, geev_w_array + *info, &i__2, &ierr); if (*info > 0) { i__1 = ilo - 1; lapackf77_zlascl("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, geev_w_array, &n, &ierr); } } #if defined(VERSION3) magma_free( dT ); #endif return *info; } /* magma_zgeev */
int STARPU_IDAMAX (const int n, double *X, const int incX) { int retVal; retVal = cblas_idamax(n, X, incX); return retVal; }