static void MakeHEDiagDom (enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, int N, TYPE *A, int lda) /* * Makes hermitian matrix diagonally dominant */ { TYPE *aa, t0; int j; const int lda2=(lda SHIFT), ldap1=((lda+1)SHIFT); if (Order == CblasRowMajor) { if (Uplo == CblasLower) Uplo = CblasUpper; else Uplo = CblasLower; } if (Uplo == CblasLower) { for (j=0; j < N; j++, A += ldap1) { #ifdef TREAL *A = 1.0 + cblas_asum(N-j, A, 1); *A += cblas_asum(j, A-lda*j, lda); #elif defined(SCPLX) *A = 1.0 + cblas_scasum(N-j, A, 1); *A += cblas_scasum(j, A-lda2*j, lda); #else *A = 1.0 + cblas_dzasum(N-j, A, 1); *A += cblas_dzasum(j, A-lda2*j, lda); #endif #ifdef TCPLX A[1] = ATL_rzero; #endif } } else /* Upper */ { for (j=0; j < N; j++, A += ldap1) { #ifdef TREAL *A = 1.0 + cblas_asum(N-j, A, lda); *A += cblas_asum(j, A-j, 1); #else #ifdef SCPLX *A = 1.0 + cblas_scasum(N-j, A, lda); *A += cblas_scasum(j, A-j*2, 1); #else *A = 1.0 + cblas_dzasum(N-j, A, lda); *A += cblas_dzasum(j, A-j*2, 1); #endif A[1] = ATL_rzero; #endif } } }
float magma_cblas_scasum( magma_int_t n, const magmaFloatComplex *x, magma_int_t incx ) { return cblas_scasum( n, x, incx ); }
void test_asum (void) { const double flteps = 1e-4, dbleps = 1e-6; { int N = 1; float X[] = { 0.239f }; int incX = -1; float expected = 0.0f; float f; f = cblas_sasum(N, X, incX); gsl_test_rel(f, expected, flteps, "sasum(case 40)"); }; { int N = 1; double X[] = { -0.413 }; int incX = -1; double expected = 0; double f; f = cblas_dasum(N, X, incX); gsl_test_rel(f, expected, dbleps, "dasum(case 41)"); }; { int N = 1; float X[] = { 0.1f, 0.017f }; int incX = -1; float expected = 0.0f; float f; f = cblas_scasum(N, X, incX); gsl_test_rel(f, expected, flteps, "scasum(case 42)"); }; { int N = 1; double X[] = { -0.651, 0.079 }; int incX = -1; double expected = 0; double f; f = cblas_dzasum(N, X, incX); gsl_test_rel(f, expected, dbleps, "dzasum(case 43)"); }; { int N = 2; float X[] = { 0.899f, -0.72f }; int incX = 1; float expected = 1.619f; float f; f = cblas_sasum(N, X, incX); gsl_test_rel(f, expected, flteps, "sasum(case 44)"); }; { int N = 2; double X[] = { 0.271, -0.012 }; int incX = 1; double expected = 0.283; double f; f = cblas_dasum(N, X, incX); gsl_test_rel(f, expected, dbleps, "dasum(case 45)"); }; { int N = 2; float X[] = { -0.567f, -0.645f, 0.098f, 0.256f }; int incX = 1; float expected = 1.566f; float f; f = cblas_scasum(N, X, incX); gsl_test_rel(f, expected, flteps, "scasum(case 46)"); }; { int N = 2; double X[] = { -0.046, -0.671, -0.323, 0.785 }; int incX = 1; double expected = 1.825; double f; f = cblas_dzasum(N, X, incX); gsl_test_rel(f, expected, dbleps, "dzasum(case 47)"); }; { int N = 2; float X[] = { 0.169f, 0.833f }; int incX = -1; float expected = 0.0f; float f; f = cblas_sasum(N, X, incX); gsl_test_rel(f, expected, flteps, "sasum(case 48)"); }; { int N = 2; double X[] = { -0.586, -0.486 }; int incX = -1; double expected = 0; double f; f = cblas_dasum(N, X, incX); gsl_test_rel(f, expected, dbleps, "dasum(case 49)"); }; { int N = 2; float X[] = { -0.314f, -0.318f, -0.835f, -0.807f }; int incX = -1; float expected = 0.0f; float f; f = cblas_scasum(N, X, incX); gsl_test_rel(f, expected, flteps, "scasum(case 50)"); }; { int N = 2; double X[] = { -0.927, 0.152, -0.554, -0.844 }; int incX = -1; double expected = 0; double f; f = cblas_dzasum(N, X, incX); gsl_test_rel(f, expected, dbleps, "dzasum(case 51)"); }; }
float scasum_(int *N, void *SX, int *INCX) { return cblas_scasum(*N, SX, *INCX); }
magma_int_t magma_ctrevc3_mt( magma_side_t side, magma_vec_t howmany, magma_int_t *select, // logical in Fortran magma_int_t n, magmaFloatComplex *T, magma_int_t ldt, magmaFloatComplex *VL, magma_int_t ldvl, magmaFloatComplex *VR, magma_int_t ldvr, magma_int_t mm, magma_int_t *mout, magmaFloatComplex *work, magma_int_t lwork, float *rwork, magma_int_t *info ) { #define T(i,j) ( T + (i) + (j)*ldt ) #define VL(i,j) (VL + (i) + (j)*ldvl) #define VR(i,j) (VR + (i) + (j)*ldvr) #define work(i,j) (work + (i) + (j)*n) // .. Parameters .. const magmaFloatComplex c_zero = MAGMA_C_ZERO; const magmaFloatComplex c_one = MAGMA_C_ONE; const magma_int_t nbmin = 16, nbmax = 128; const magma_int_t ione = 1; // .. Local Scalars .. magma_int_t allv, bothv, leftv, over, rightv, somev; magma_int_t i, ii, is, j, k, ki, iv, n2, nb, nb2, version; float ovfl, remax, smin, smlnum, ulp, unfl; // Decode and test the input parameters bothv = (side == MagmaBothSides); rightv = (side == MagmaRight) || bothv; leftv = (side == MagmaLeft ) || bothv; allv = (howmany == MagmaAllVec); over = (howmany == MagmaBacktransVec); somev = (howmany == MagmaSomeVec); // Set mout to the number of columns required to store the selected // eigenvectors. if ( somev ) { *mout = 0; for( j=0; j < n; ++j ) { if ( select[j] ) { *mout += 1; } } } else { *mout = n; } *info = 0; if ( ! rightv && ! leftv ) *info = -1; else if ( ! allv && ! over && ! somev ) *info = -2; else if ( n < 0 ) *info = -4; else if ( ldt < max( 1, n ) ) *info = -6; else if ( ldvl < 1 || ( leftv && ldvl < n ) ) *info = -8; else if ( ldvr < 1 || ( rightv && ldvr < n ) ) *info = -10; else if ( mm < *mout ) *info = -11; else if ( lwork < max( 1, 2*n ) ) *info = -14; if ( *info != 0 ) { magma_xerbla( __func__, -(*info) ); return *info; } // Quick return if possible. if ( n == 0 ) { return *info; } // Use blocked version (2) if sufficient workspace. // Requires 1 vector to save diagonal elements, and 2*nb vectors for x and Q*x. // (Compared to dtrevc3, rwork stores 1-norms.) // Zero-out the workspace to avoid potential NaN propagation. nb = 2; if ( lwork >= n + 2*n*nbmin ) { version = 2; nb = (lwork - n) / (2*n); nb = min( nb, nbmax ); nb2 = 1 + 2*nb; lapackf77_claset( "F", &n, &nb2, &c_zero, &c_zero, work, &n ); } else { version = 1; } // Set the constants to control overflow. unfl = lapackf77_slamch( "Safe minimum" ); ovfl = 1. / unfl; lapackf77_slabad( &unfl, &ovfl ); ulp = lapackf77_slamch( "Precision" ); smlnum = unfl*( n / ulp ); // Store the diagonal elements of T in working array work. for( i=0; i < n; ++i ) { *work(i,0) = *T(i,i); } // Compute 1-norm of each column of strictly upper triangular // part of T to control overflow in triangular solver. rwork[0] = 0.; for( j=1; j < n; ++j ) { rwork[j] = cblas_scasum( j, T(0,j), ione ); } // launch threads -- each single-threaded MKL magma_int_t nthread = magma_get_parallel_numthreads(); magma_int_t lapack_nthread = magma_get_lapack_numthreads(); magma_set_lapack_numthreads( 1 ); magma_queue queue; queue.launch( nthread ); //printf( "nthread %d, %d\n", nthread, lapack_nthread ); // NB = N/thread, rounded up to multiple of 16, // but avoid multiples of page size, e.g., 512*8 bytes = 4096. magma_int_t NB = magma_int_t( ceil( ceil( ((float)n) / nthread ) / 16. ) * 16. ); if ( NB % 512 == 0 ) { NB += 32; } magma_timer_t time_total=0, time_trsv=0, time_gemm=0, time_gemv=0, time_trsv_sum=0, time_gemm_sum=0, time_gemv_sum=0; timer_start( time_total ); if ( rightv ) { // ============================================================ // Compute right eigenvectors. // iv is index of column in current block. // Non-blocked version always uses iv=1; // blocked version starts with iv=nb, goes down to 1. // (Note the "0-th" column is used to store the original diagonal.) iv = 1; if ( version == 2 ) { iv = nb; } timer_start( time_trsv ); is = *mout - 1; for( ki=n-1; ki >= 0; --ki ) { if ( somev ) { if ( ! select[ki] ) { continue; } } smin = max( ulp*( MAGMA_C_ABS1( *T(ki,ki) ) ), smlnum ); // -------------------------------------------------------- // Complex right eigenvector *work(ki,iv) = c_one; // Form right-hand side. for( k=0; k < ki; ++k ) { *work(k,iv) = -(*T(k,ki)); } // Solve upper triangular system: // [ T(1:ki-1,1:ki-1) - T(ki,ki) ]*X = scale*work. if ( ki > 0 ) { queue.push_task( new magma_clatrsd_task( MagmaUpper, MagmaNoTrans, MagmaNonUnit, MagmaTrue, ki, T, ldt, *T(ki,ki), work(0,iv), work(ki,iv), rwork )); } // Copy the vector x or Q*x to VR and normalize. if ( ! over ) { // ------------------------------ // no back-transform: copy x to VR and normalize queue.sync(); n2 = ki+1; blasf77_ccopy( &n2, work(0,iv), &ione, VR(0,is), &ione ); ii = blasf77_icamax( &n2, VR(0,is), &ione ) - 1; remax = 1. / MAGMA_C_ABS1( *VR(ii,is) ); blasf77_csscal( &n2, &remax, VR(0,is), &ione ); for( k=ki+1; k < n; ++k ) { *VR(k,is) = c_zero; } } else if ( version == 1 ) { // ------------------------------ // version 1: back-transform each vector with GEMV, Q*x. queue.sync(); time_trsv_sum += timer_stop( time_trsv ); timer_start( time_gemv ); if ( ki > 0 ) { blasf77_cgemv( "n", &n, &ki, &c_one, VR, &ldvr, work(0, iv), &ione, work(ki,iv), VR(0,ki), &ione ); } time_gemv_sum += timer_stop( time_gemv ); ii = blasf77_icamax( &n, VR(0,ki), &ione ) - 1; remax = 1. / MAGMA_C_ABS1( *VR(ii,ki) ); blasf77_csscal( &n, &remax, VR(0,ki), &ione ); timer_start( time_trsv ); } else if ( version == 2 ) { // ------------------------------ // version 2: back-transform block of vectors with GEMM // zero out below vector for( k=ki+1; k < n; ++k ) { *work(k,iv) = c_zero; } // Columns iv:nb of work are valid vectors. // When the number of vectors stored reaches nb, // or if this was last vector, do the GEMM if ( (iv == 1) || (ki == 0) ) { queue.sync(); time_trsv_sum += timer_stop( time_trsv ); timer_start( time_gemm ); nb2 = nb-iv+1; n2 = ki+nb-iv+1; // split gemm into multiple tasks, each doing one block row for( i=0; i < n; i += NB ) { magma_int_t ib = min( NB, n-i ); queue.push_task( new cgemm_task( MagmaNoTrans, MagmaNoTrans, ib, nb2, n2, c_one, VR(i,0), ldvr, work(0,iv ), n, c_zero, work(i,nb+iv), n )); } queue.sync(); time_gemm_sum += timer_stop( time_gemm ); // normalize vectors // TODO if somev, should copy vectors individually to correct location. for( k = iv; k <= nb; ++k ) { ii = blasf77_icamax( &n, work(0,nb+k), &ione ) - 1; remax = 1. / MAGMA_C_ABS1( *work(ii,nb+k) ); blasf77_csscal( &n, &remax, work(0,nb+k), &ione ); } lapackf77_clacpy( "F", &n, &nb2, work(0,nb+iv), &n, VR(0,ki), &ldvr ); iv = nb; timer_start( time_trsv ); } else { iv -= 1; } } // blocked back-transform is -= 1; } } timer_stop( time_trsv ); timer_stop( time_total ); timer_printf( "trevc trsv %.4f, gemm %.4f, gemv %.4f, total %.4f\n", time_trsv_sum, time_gemm_sum, time_gemv_sum, time_total ); if ( leftv ) { // ============================================================ // Compute left eigenvectors. // iv is index of column in current block. // Non-blocked version always uses iv=1; // blocked version starts with iv=1, goes up to nb. // (Note the "0-th" column is used to store the original diagonal.) iv = 1; is = 0; for( ki=0; ki < n; ++ki ) { if ( somev ) { if ( ! select[ki] ) { continue; } } smin = max( ulp*MAGMA_C_ABS1( *T(ki,ki) ), smlnum ); // -------------------------------------------------------- // Complex left eigenvector *work(ki,iv) = c_one; // Form right-hand side. for( k = ki + 1; k < n; ++k ) { *work(k,iv) = -MAGMA_C_CNJG( *T(ki,k) ); } // Solve conjugate-transposed triangular system: // [ T(ki+1:n,ki+1:n) - T(ki,ki) ]**H * X = scale*work. // TODO what happens with T(k,k) - lambda is small? Used to have < smin test. if ( ki < n-1 ) { n2 = n-ki-1; queue.push_task( new magma_clatrsd_task( MagmaUpper, MagmaConjTrans, MagmaNonUnit, MagmaTrue, n2, T(ki+1,ki+1), ldt, *T(ki,ki), work(ki+1,iv), work(ki,iv), rwork )); } // Copy the vector x or Q*x to VL and normalize. if ( ! over ) { // ------------------------------ // no back-transform: copy x to VL and normalize queue.sync(); n2 = n-ki; blasf77_ccopy( &n2, work(ki,iv), &ione, VL(ki,is), &ione ); ii = blasf77_icamax( &n2, VL(ki,is), &ione ) + ki - 1; remax = 1. / MAGMA_C_ABS1( *VL(ii,is) ); blasf77_csscal( &n2, &remax, VL(ki,is), &ione ); for( k=0; k < ki; ++k ) { *VL(k,is) = c_zero; } } else if ( version == 1 ) { // ------------------------------ // version 1: back-transform each vector with GEMV, Q*x. queue.sync(); if ( ki < n-1 ) { n2 = n-ki-1; blasf77_cgemv( "n", &n, &n2, &c_one, VL(0,ki+1), &ldvl, work(ki+1,iv), &ione, work(ki, iv), VL(0,ki), &ione ); } ii = blasf77_icamax( &n, VL(0,ki), &ione ) - 1; remax = 1. / MAGMA_C_ABS1( *VL(ii,ki) ); blasf77_csscal( &n, &remax, VL(0,ki), &ione ); } else if ( version == 2 ) { // ------------------------------ // version 2: back-transform block of vectors with GEMM // zero out above vector // could go from (ki+1)-NV+1 to ki for( k=0; k < ki; ++k ) { *work(k,iv) = c_zero; } // Columns 1:iv of work are valid vectors. // When the number of vectors stored reaches nb, // or if this was last vector, do the GEMM if ( (iv == nb) || (ki == n-1) ) { queue.sync(); n2 = n-(ki+1)+iv; // split gemm into multiple tasks, each doing one block row for( i=0; i < n; i += NB ) { magma_int_t ib = min( NB, n-i ); queue.push_task( new cgemm_task( MagmaNoTrans, MagmaNoTrans, ib, iv, n2, c_one, VL(i,ki-iv+1), ldvl, work(ki-iv+1,1), n, c_zero, work(i,nb+1), n )); } queue.sync(); // normalize vectors for( k=1; k <= iv; ++k ) { ii = blasf77_icamax( &n, work(0,nb+k), &ione ) - 1; remax = 1. / MAGMA_C_ABS1( *work(ii,nb+k) ); blasf77_csscal( &n, &remax, work(0,nb+k), &ione ); } lapackf77_clacpy( "F", &n, &iv, work(0,nb+1), &n, VL(0,ki-iv+1), &ldvl ); iv = 1; } else { iv += 1; } } // blocked back-transform is += 1; } } // close down threads queue.quit(); magma_set_lapack_numthreads( lapack_nthread ); return *info; } // End of CTREVC
float F77_scasum(const int *N, void *X, const int *incX) { return cblas_scasum(*N, X, *incX); }
// ---------------------------------------- int main( int argc, char** argv ) { TESTING_INIT(); //real_Double_t t_m, t_c, t_f; magma_int_t ione = 1; magmaFloatComplex *A, *B; float diff, error; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t m, n, k, size, maxn, ld; magmaFloatComplex x2_m, x2_c; // complex x for magma, cblas/fortran blas respectively float x_m, x_c; // x for magma, cblas/fortran blas respectively magma_opts opts; parse_opts( argc, argv, &opts ); opts.tolerance = max( 100., opts.tolerance ); float tol = opts.tolerance * lapackf77_slamch("E"); gTol = tol; printf( "!! Calling these CBLAS and Fortran BLAS sometimes crashes (segfault), which !!\n" "!! is why we use wrappers. It does not necesarily indicate a bug in MAGMA. !!\n" "\n" "Diff compares MAGMA wrapper to CBLAS and BLAS function; should be exactly 0.\n" "Error compares MAGMA implementation to CBLAS and BLAS function; should be ~ machine epsilon.\n" "\n" ); float total_diff = 0.; float total_error = 0.; int inc[] = { 1 }; //{ -2, -1, 1, 2 }; //{ 1 }; //{ -1, 1 }; int ninc = sizeof(inc)/sizeof(*inc); for( int itest = 0; itest < opts.ntest; ++itest ) { m = opts.msize[itest]; n = opts.nsize[itest]; k = opts.ksize[itest]; for( int iincx = 0; iincx < ninc; ++iincx ) { magma_int_t incx = inc[iincx]; for( int iincy = 0; iincy < ninc; ++iincy ) { magma_int_t incy = inc[iincy]; printf("=========================================================================\n"); printf( "m=%d, n=%d, k=%d, incx = %d, incy = %d\n", (int) m, (int) n, (int) k, (int) incx, (int) incy ); printf( "Function MAGMA CBLAS BLAS Diff Error\n" " msec msec msec\n" ); // allocate matrices // over-allocate so they can be any combination of // {m,n,k} * {abs(incx), abs(incy)} by // {m,n,k} * {abs(incx), abs(incy)} maxn = max( max( m, n ), k ) * max( abs(incx), abs(incy) ); ld = max( 1, maxn ); size = ld*maxn; magma_cmalloc_pinned( &A, size ); assert( A != NULL ); magma_cmalloc_pinned( &B, size ); assert( B != NULL ); // initialize matrices lapackf77_clarnv( &ione, ISEED, &size, A ); lapackf77_clarnv( &ione, ISEED, &size, B ); printf( "Level 1 BLAS ----------------------------------------------------------\n" ); // ----- test SCASUM // get one-norm of column j of A if ( incx > 0 && incx == incy ) { // positive, no incy diff = 0; error = 0; for( int j = 0; j < k; ++j ) { x_m = magma_cblas_scasum( m, A(0,j), incx ); x_c = cblas_scasum( m, A(0,j), incx ); diff += fabs( x_m - x_c ); x_c = blasf77_scasum( &m, A(0,j), &incx ); error += fabs( (x_m - x_c) / (m*x_c) ); } output( "scasum", diff, error ); total_diff += diff; total_error += error; } // ----- test SCNRM2 // get two-norm of column j of A if ( incx > 0 && incx == incy ) { // positive, no incy diff = 0; error = 0; for( int j = 0; j < k; ++j ) { x_m = magma_cblas_scnrm2( m, A(0,j), incx ); x_c = cblas_scnrm2( m, A(0,j), incx ); diff += fabs( x_m - x_c ); x_c = blasf77_scnrm2( &m, A(0,j), &incx ); error += fabs( (x_m - x_c) / (m*x_c) ); } output( "scnrm2", diff, error ); total_diff += diff; total_error += error; } // ----- test CDOTC // dot columns, Aj^H Bj diff = 0; error = 0; for( int j = 0; j < k; ++j ) { // MAGMA implementation, not just wrapper x2_m = magma_cblas_cdotc( m, A(0,j), incx, B(0,j), incy ); // crashes on MKL 11.1.2, ILP64 #if ! defined( MAGMA_WITH_MKL ) #ifdef COMPLEX cblas_cdotc_sub( m, A(0,j), incx, B(0,j), incy, &x2_c ); #else x2_c = cblas_cdotc( m, A(0,j), incx, B(0,j), incy ); #endif error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif // crashes on MacOS 10.9 #if ! defined( __APPLE__ ) x2_c = blasf77_cdotc( &m, A(0,j), &incx, B(0,j), &incy ); error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif } output( "cdotc", diff, error ); total_diff += diff; total_error += error; total_error += error; // ----- test CDOTU // dot columns, Aj^T * Bj diff = 0; error = 0; for( int j = 0; j < k; ++j ) { // MAGMA implementation, not just wrapper x2_m = magma_cblas_cdotu( m, A(0,j), incx, B(0,j), incy ); // crashes on MKL 11.1.2, ILP64 #if ! defined( MAGMA_WITH_MKL ) #ifdef COMPLEX cblas_cdotu_sub( m, A(0,j), incx, B(0,j), incy, &x2_c ); #else x2_c = cblas_cdotu( m, A(0,j), incx, B(0,j), incy ); #endif error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif // crashes on MacOS 10.9 #if ! defined( __APPLE__ ) x2_c = blasf77_cdotu( &m, A(0,j), &incx, B(0,j), &incy ); error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif } output( "cdotu", diff, error ); total_diff += diff; total_error += error; // tell user about disabled functions #if defined( MAGMA_WITH_MKL ) printf( "cblas_cdotc and cblas_cdotu disabled with MKL (segfaults)\n" ); #endif #if defined( __APPLE__ ) printf( "blasf77_cdotc and blasf77_cdotu disabled on MacOS (segfaults)\n" ); #endif // cleanup magma_free_pinned( A ); magma_free_pinned( B ); fflush( stdout ); }}} // itest, incx, incy // TODO use average error? printf( "sum diffs = %8.2g, MAGMA wrapper compared to CBLAS and Fortran BLAS; should be exactly 0.\n" "sum errors = %8.2e, MAGMA implementation compared to CBLAS and Fortran BLAS; should be ~ machine epsilon.\n\n", total_diff, total_error ); if ( total_diff != 0. ) { printf( "some tests failed diff == 0.; see above.\n" ); } else { printf( "all tests passed diff == 0.\n" ); } TESTING_FINALIZE(); int status = (total_diff != 0.); return status; }
magma_int_t magma_clatrsd( magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_bool_t normin, magma_int_t n, const magmaFloatComplex *A, magma_int_t lda, magmaFloatComplex lambda, magmaFloatComplex *x, float *scale, float *cnorm, magma_int_t *info) { #define A(i,j) (A + (i) + (j)*lda) /* constants */ const magma_int_t ione = 1; const float d_half = 0.5; const magmaFloatComplex c_zero = MAGMA_C_ZERO; const magmaFloatComplex c_one = MAGMA_C_ONE; /* System generated locals */ magma_int_t len; magmaFloatComplex ztmp; /* Local variables */ magma_int_t i, j; float xj, rec, tjj; magma_int_t jinc; float xbnd; magma_int_t imax; float tmax; magmaFloatComplex tjjs; float xmax, grow; float tscal; magmaFloatComplex uscal; magma_int_t jlast; magmaFloatComplex csumj; float bignum; magma_int_t jfirst; float smlnum; /* Function Body */ *info = 0; magma_int_t upper = (uplo == MagmaUpper); magma_int_t notran = (trans == MagmaNoTrans); magma_int_t nounit = (diag == MagmaNonUnit); /* Test the input parameters. */ if ( ! upper && uplo != MagmaLower ) { *info = -1; } else if (! notran && trans != MagmaTrans && trans != MagmaConjTrans) { *info = -2; } else if ( ! nounit && diag != MagmaUnit ) { *info = -3; } else if ( ! (normin == MagmaTrue) && ! (normin == MagmaFalse) ) { *info = -4; } else if ( n < 0 ) { *info = -5; } else if ( lda < max(1,n) ) { *info = -7; } if ( *info != 0 ) { magma_xerbla( __func__, -(*info) ); return *info; } /* Quick return if possible */ if ( n == 0 ) { return *info; } /* Determine machine dependent parameters to control overflow. */ smlnum = lapackf77_slamch( "Safe minimum" ); bignum = 1. / smlnum; lapackf77_slabad( &smlnum, &bignum ); smlnum /= lapackf77_slamch( "Precision" ); bignum = 1. / smlnum; *scale = 1.; if ( normin == MagmaFalse ) { /* Compute the 1-norm of each column, not including the diagonal. */ if ( upper ) { /* A is upper triangular. */ cnorm[0] = 0.; for( j = 1; j < n; ++j ) { cnorm[j] = cblas_scasum( j, A(0,j), ione ); } } else { /* A is lower triangular. */ for( j = 0; j < n-1; ++j ) { cnorm[j] = cblas_scasum( n-(j+1), A(j+1,j), ione ); } cnorm[n-1] = 0.; } } /* Scale the column norms by TSCAL if the maximum element in CNORM is */ /* greater than BIGNUM/2. */ imax = blasf77_isamax( &n, &cnorm[0], &ione ) - 1; tmax = cnorm[imax]; if ( tmax <= bignum * 0.5 ) { tscal = 1.; } else { tscal = 0.5 / (smlnum * tmax); blasf77_sscal( &n, &tscal, &cnorm[0], &ione ); } /* ================================================================= */ /* Compute a bound on the computed solution vector to see if the */ /* Level 2 BLAS routine CTRSV can be used. */ xmax = 0.; for( j = 0; j < n; ++j ) { xmax = max( xmax, 0.5*MAGMA_C_ABS1( x[j] )); } xbnd = xmax; if ( notran ) { /* ---------------------------------------- */ /* Compute the growth in A * x = b. */ if ( upper ) { jfirst = n-1; jlast = 0; jinc = -1; } else { jfirst = 0; jlast = n; jinc = 1; } if ( tscal != 1. ) { grow = 0.; goto L60; } /* A is non-unit triangular. */ /* Compute GROW = 1/G(j) and XBND = 1/M(j). */ /* Initially, G(0) = max{x(i), i=1,...,n}. */ grow = 0.5 / max( xbnd, smlnum ); xbnd = grow; for( j = jfirst; (jinc < 0 ? j >= jlast : j < jlast); j += jinc ) { /* Exit the loop if the growth factor is too small. */ if ( grow <= smlnum ) { goto L60; } if ( nounit ) { tjjs = *A(j,j) - lambda; } else { tjjs = c_one - lambda; } tjj = MAGMA_C_ABS1( tjjs ); if ( tjj >= smlnum ) { /* M(j) = G(j-1) / abs(A(j,j)) */ xbnd = min( xbnd, min(1.,tjj)*grow ); } else { /* M(j) could overflow, set XBND to 0. */ xbnd = 0.; } if ( tjj + cnorm[j] >= smlnum ) { /* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */ grow *= (tjj / (tjj + cnorm[j])); } else { /* G(j) could overflow, set GROW to 0. */ grow = 0.; } } grow = xbnd; L60: ; } else { /* ---------------------------------------- */ /* Compute the growth in A**T * x = b or A**H * x = b. */ if ( upper ) { jfirst = 0; jlast = n; jinc = 1; } else { jfirst = n-1; jlast = 0; jinc = -1; } if ( tscal != 1. ) { grow = 0.; goto L90; } /* A is non-unit triangular. */ /* Compute GROW = 1/G(j) and XBND = 1/M(j). */ /* Initially, M(0) = max{x(i), i=1,...,n}. */ grow = 0.5 / max( xbnd, smlnum ); xbnd = grow; for( j = jfirst; (jinc < 0 ? j >= jlast : j < jlast); j += jinc ) { /* Exit the loop if the growth factor is too small. */ if ( grow <= smlnum ) { goto L90; } /* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */ xj = 1. + cnorm[j]; grow = min( grow, xbnd / xj ); if ( nounit ) { tjjs = *A(j,j) - lambda; } else { tjjs = c_one - lambda; } tjj = MAGMA_C_ABS1( tjjs ); if ( tjj >= smlnum ) { /* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */ if ( xj > tjj ) { xbnd *= (tjj / xj); } } else { /* M(j) could overflow, set XBND to 0. */ xbnd = 0.; } } grow = min( grow, xbnd ); L90: ; } /* ================================================================= */ /* Due to modified diagonal, we can't use regular BLAS ctrsv. */ /* Use a Level 1 BLAS solve, scaling intermediate results. */ if ( xmax > bignum * 0.5 ) { /* Scale X so that its components are less than or equal to */ /* BIGNUM in absolute value. */ *scale = (bignum * 0.5) / xmax; blasf77_csscal( &n, scale, &x[0], &ione ); xmax = bignum; } else { xmax *= 2.; } if ( notran ) { /* ---------------------------------------- */ /* Solve A * x = b */ for( j = jfirst; (jinc < 0 ? j >= jlast : j < jlast); j += jinc ) { /* Compute x(j) = b(j) / A(j,j), scaling x if necessary. */ xj = MAGMA_C_ABS1( x[j] ); if ( nounit ) { tjjs = (*A(j,j) - lambda ) * tscal; } else { tjjs = (c_one - lambda) * tscal; if ( tscal == 1. ) { goto L110; } } tjj = MAGMA_C_ABS1( tjjs ); if ( tjj > smlnum ) { /* abs(A(j,j)) > SMLNUM: */ if ( tjj < 1. ) { if ( xj > tjj * bignum ) { /* Scale x by 1/b(j). */ rec = 1. / xj; blasf77_csscal( &n, &rec, &x[0], &ione ); *scale *= rec; xmax *= rec; } } x[j] = x[j] / tjjs; xj = MAGMA_C_ABS1( x[j] ); } else if ( tjj > 0. ) { /* 0 < abs(A(j,j)) <= SMLNUM: */ if ( xj > tjj * bignum ) { /* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM */ /* to avoid overflow when dividing by A(j,j). */ rec = (tjj * bignum) / xj; if ( cnorm[j] > 1. ) { /* Scale by 1/CNORM(j) to avoid overflow when */ /* multiplying x(j) times column j. */ rec /= cnorm[j]; } blasf77_csscal( &n, &rec, &x[0], &ione ); *scale *= rec; xmax *= rec; } x[j] = x[j] / tjjs; xj = MAGMA_C_ABS1( x[j] ); } else { /* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ /* scale = 0, and compute a solution to A*x = 0. */ for( i = 0; i < n; ++i ) { x[i] = c_zero; } x[j] = c_one; xj = 1.; *scale = 0.; xmax = 0.; } L110: /* Scale x if necessary to avoid overflow when adding a */ /* multiple of column j of A. */ if ( xj > 1. ) { rec = 1. / xj; if ( cnorm[j] > (bignum - xmax) * rec ) { /* Scale x by 1/(2*abs(x(j))). */ rec *= 0.5; blasf77_csscal( &n, &rec, &x[0], &ione ); *scale *= rec; } } else if ( xj * cnorm[j] > bignum - xmax ) { /* Scale x by 1/2. */ blasf77_csscal( &n, &d_half, &x[0], &ione ); *scale *= 0.5; } if ( upper ) { if ( j > 0 ) { /* Compute the update */ /* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) */ len = j; ztmp = -tscal * x[j]; blasf77_caxpy( &len, &ztmp, A(0,j), &ione, &x[0], &ione ); i = blasf77_icamax( &len, &x[0], &ione ) - 1; xmax = MAGMA_C_ABS1( x[i] ); } } else { if ( j < n-1 ) { /* Compute the update */ /* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) */ len = n - (j+1); ztmp = -tscal * x[j]; blasf77_caxpy( &len, &ztmp, A(j+1,j), &ione, &x[j + 1], &ione ); i = j + blasf77_icamax( &len, &x[j + 1], &ione ); xmax = MAGMA_C_ABS1( x[i] ); } } } } else if ( trans == MagmaTrans ) { /* ---------------------------------------- */ /* Solve A**T * x = b */ for( j = jfirst; (jinc < 0 ? j >= jlast : j < jlast); j += jinc ) { /* Compute x(j) = b(j) - sum A(k,j)*x(k). */ /* k<>j */ xj = MAGMA_C_ABS1( x[j] ); uscal = MAGMA_C_MAKE( tscal, 0. ); rec = 1. / max( xmax, 1. ); if ( cnorm[j] > (bignum - xj) * rec ) { /* If x(j) could overflow, scale x by 1/(2*XMAX). */ rec *= 0.5; if ( nounit ) { tjjs = (*A(j,j) - lambda) * tscal; } else { tjjs = (c_one - lambda) * tscal; } tjj = MAGMA_C_ABS1( tjjs ); if ( tjj > 1. ) { /* Divide by A(j,j) when scaling x if A(j,j) > 1. */ rec = min( 1., rec * tjj ); uscal = uscal / tjjs; } if ( rec < 1. ) { blasf77_csscal( &n, &rec, &x[0], &ione ); *scale *= rec; xmax *= rec; } } csumj = c_zero; if ( uscal == c_one ) { /* If the scaling needed for A in the dot product is 1, */ /* call ZDOTU to perform the dot product. */ if ( upper ) { cblas_cdotu_sub( j, A(0,j), ione, &x[0], ione, &csumj ); } else if ( j < n-1 ) { cblas_cdotu_sub( n-(j+1), A(j+1,j), ione, &x[j + 1], ione, &csumj ); } } else { /* Otherwise, use in-line code for the dot product. */ if ( upper ) { for( i = 0; i < j; ++i ) { csumj += (*A(i,j) * uscal) * x[i]; } } else if ( j < n-1 ) { for( i = j+1; i < n; ++i ) { csumj += (*A(i,j) * uscal) * x[i]; } } } if ( uscal == MAGMA_C_MAKE( tscal, 0. )) { /* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) */ /* was not used to scale the dotproduct. */ x[j] -= csumj; xj = MAGMA_C_ABS1( x[j] ); if ( nounit ) { tjjs = (*A(j,j) - lambda) * tscal; } else { tjjs = (c_one - lambda) * tscal; if ( tscal == 1. ) { goto L160; } } /* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ tjj = MAGMA_C_ABS1( tjjs ); if ( tjj > smlnum ) { /* abs(A(j,j)) > SMLNUM: */ if ( tjj < 1. ) { if ( xj > tjj * bignum ) { /* Scale X by 1/abs(x(j)). */ rec = 1. / xj; blasf77_csscal( &n, &rec, &x[0], &ione ); *scale *= rec; xmax *= rec; } } x[j] = x[j] / tjjs; } else if ( tjj > 0. ) { /* 0 < abs(A(j,j)) <= SMLNUM: */ if ( xj > tjj * bignum ) { /* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ rec = (tjj * bignum) / xj; blasf77_csscal( &n, &rec, &x[0], &ione ); *scale *= rec; xmax *= rec; } x[j] = x[j] / tjjs; } else { /* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ /* scale = 0 and compute a solution to A**T *x = 0. */ for( i = 0; i < n; ++i ) { x[i] = c_zero; } x[j] = c_one; *scale = 0.; xmax = 0.; } L160: ; } else { /* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot */ /* product has already been divided by 1/A(j,j). */ x[j] = (x[j] / tjjs) - csumj; } xmax = max( xmax, MAGMA_C_ABS1( x[j] )); } } else { /* ---------------------------------------- */ /* Solve A**H * x = b */ for( j = jfirst; (jinc < 0 ? j >= jlast : j < jlast); j += jinc ) { /* Compute x(j) = b(j) - sum A(k,j)*x(k). */ /* k<>j */ xj = MAGMA_C_ABS1( x[j] ); uscal = MAGMA_C_MAKE( tscal, 0. ); rec = 1. / max(xmax, 1.); if ( cnorm[j] > (bignum - xj) * rec ) { /* If x(j) could overflow, scale x by 1/(2*XMAX). */ rec *= 0.5; if ( nounit ) { tjjs = MAGMA_C_CNJG( *A(j,j) - lambda ) * tscal; } else { tjjs = (c_one - lambda) * tscal; } tjj = MAGMA_C_ABS1( tjjs ); if ( tjj > 1. ) { /* Divide by A(j,j) when scaling x if A(j,j) > 1. */ rec = min( 1., rec * tjj ); uscal = uscal / tjjs; } if ( rec < 1. ) { blasf77_csscal( &n, &rec, &x[0], &ione ); *scale *= rec; xmax *= rec; } } csumj = c_zero; if ( uscal == c_one ) { /* If the scaling needed for A in the dot product is 1, */ /* call CDOTC to perform the dot product. */ if ( upper ) { cblas_cdotc_sub( j, A(0,j), ione, &x[0], ione, &csumj ); } else if ( j < n-1 ) { cblas_cdotc_sub( n-(j+1), A(j+1,j), ione, &x[j + 1], ione, &csumj ); } } else { /* Otherwise, use in-line code for the dot product. */ if ( upper ) { for( i = 0; i < j; ++i ) { csumj += (MAGMA_C_CNJG( *A(i,j) ) * uscal) * x[i]; } } else if ( j < n-1 ) { for( i = j + 1; i < n; ++i ) { csumj += (MAGMA_C_CNJG( *A(i,j) ) * uscal) * x[i]; } } } if ( uscal == tscal ) { /* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) */ /* was not used to scale the dotproduct. */ x[j] -= csumj; xj = MAGMA_C_ABS1( x[j] ); if ( nounit ) { tjjs = MAGMA_C_CNJG( *A(j,j) - lambda ) * tscal; } else { tjjs = (c_one - lambda) * tscal; if ( tscal == 1. ) { goto L210; } } /* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ tjj = MAGMA_C_ABS1( tjjs ); if ( tjj > smlnum ) { /* abs(A(j,j)) > SMLNUM: */ if ( tjj < 1. ) { if ( xj > tjj * bignum ) { /* Scale X by 1/abs(x(j)). */ rec = 1. / xj; blasf77_csscal( &n, &rec, &x[0], &ione ); *scale *= rec; xmax *= rec; } } x[j] = x[j] / tjjs; } else if ( tjj > 0. ) { /* 0 < abs(A(j,j)) <= SMLNUM: */ if ( xj > tjj * bignum ) { /* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ rec = (tjj * bignum) / xj; blasf77_csscal( &n, &rec, &x[0], &ione ); *scale *= rec; xmax *= rec; } x[j] = x[j] / tjjs; } else { /* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ /* scale = 0 and compute a solution to A**H *x = 0. */ for( i = 0; i < n; ++i ) { x[i] = c_zero; } x[j] = c_one; *scale = 0.; xmax = 0.; } L210: ; } else { /* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot */ /* product has already been divided by 1/A(j,j). */ x[j] = (x[j] / tjjs) - csumj; } xmax = max( xmax, MAGMA_C_ABS1( x[j] )); } } *scale /= tscal; /* Scale the column norms by 1/TSCAL for return. */ if ( tscal != 1. ) { float d = 1. / tscal; blasf77_sscal( &n, &d, &cnorm[0], &ione ); } return *info; } /* end clatrsd */