void bli_cdot( conj_t conj, int n, scomplex* x, int incx, scomplex* y, int incy, scomplex* rho ) { #ifdef BLIS_ENABLE_CBLAS_INTERFACES if ( bli_is_conj( conj ) ) { cblas_cdotc_sub( n, x, incx, y, incy, rho ); } else // if ( !bli_is_conj( conj ) ) { cblas_cdotu_sub( n, x, incx, y, incy, rho ); } #else bli_cdot_in( conj, n, x, incx, y, incy, rho ); #endif }
JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_cdotxx (JNIEnv * env, jobject calling_obj, jint N, jfloatArray jX, jint startX, jfloatArray jY, jint startY, jfloatArray jZ){ jfloat * X = (*env)->GetPrimitiveArrayCritical(env, jX, JNI_FALSE); jfloat * Y = (*env)->GetPrimitiveArrayCritical(env, jY, JNI_FALSE); jfloat * Z = (*env)->GetPrimitiveArrayCritical(env, jZ, JNI_FALSE); cblas_cdotu_sub(N, X+startX, 1, Y+startY, 1, Z); (*env)->ReleasePrimitiveArrayCritical(env, jY, Y, 0); (*env)->ReleasePrimitiveArrayCritical(env, jZ, Z, 0); (*env)->ReleasePrimitiveArrayCritical(env, jX, X, 0); }
JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_cdotm (JNIEnv * env, jobject calling_obj, jint nrows, jint ncols, jfloatArray jX, jint ldx, jfloatArray jY, jint ldy, jfloatArray jZ){ jfloat * X = (*env)->GetPrimitiveArrayCritical(env, jX, JNI_FALSE); jfloat * Y = (*env)->GetPrimitiveArrayCritical(env, jY, JNI_FALSE); jfloat * Z = (*env)->GetPrimitiveArrayCritical(env, jZ, JNI_FALSE); int i; for (i=0; i<2*ncols; i+=2) { cblas_cdotu_sub(nrows, X+i*ldx, 1, Y+i*ldy, 1, Z+i); } (*env)->ReleasePrimitiveArrayCritical(env, jY, Y, 0); (*env)->ReleasePrimitiveArrayCritical(env, jZ, Z, 0); (*env)->ReleasePrimitiveArrayCritical(env, jX, X, 0); }
static void CFLOAT_dot(void *a, intp stridea, void *b, intp strideb, void *res, intp n, void *tmp) { register int na = stridea / sizeof(cfloat); register int nb = strideb / sizeof(cfloat); if ((sizeof(cfloat) * na == stridea) && (sizeof(cfloat) * nb == strideb) && (na >= 0) && (nb >= 0)) cblas_cdotu_sub((int)n, (float *)a, na, (float *)b, nb, (float *)res); else oldFunctions[PyArray_CFLOAT](a, stridea, b, strideb, res, n, tmp); }
void phi_dotu_sub(const int N, const Complex *X, const int incX, const Complex *Y, const int incY, Complex *dotu){ #ifndef NOBLAS #ifdef SINGLEPRECISION cblas_cdotu_sub(N,X,incX,Y,incY,dotu); #else cblas_zdotu_sub(N,X,incX,Y,incY,dotu); #endif #else int i; *dotu = 0; for(i = 0; i < N; ++i, X+=incX, Y+=incY){ *dotu += (*X)*(*Y); } #endif }
std::complex<float> HostVector<std::complex<float> >::DotNonConj(const BaseVector<std::complex<float> > &x) const { assert(&x != NULL); const HostVector<std::complex<float> > *cast_x = dynamic_cast<const HostVector<std::complex<float> >*> (&x); assert(cast_x != NULL); assert(this->size_ == cast_x->size_); std::complex<float> res; cblas_cdotu_sub(this->size_, this->vec_, 1, cast_x->vec_, 1, &res); return res; }
void test_dot (void) { const double flteps = 1e-4, dbleps = 1e-6; { int N = 1; float alpha = 0.0f; float X[] = { 0.733f }; float Y[] = { 0.825f }; int incX = 1; int incY = -1; float expected = 0.604725f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 1)"); }; { int N = 1; float alpha = 0.1f; float X[] = { 0.733f }; float Y[] = { 0.825f }; int incX = 1; int incY = -1; float expected = 0.704725f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 2)"); }; { int N = 1; float alpha = 1.0f; float X[] = { 0.733f }; float Y[] = { 0.825f }; int incX = 1; int incY = -1; float expected = 1.604725f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 3)"); }; { int N = 1; float alpha = 0.0f; float X[] = { -0.812f }; float Y[] = { -0.667f }; int incX = -1; int incY = 1; float expected = 0.541604f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 4)"); }; { int N = 1; float alpha = 0.1f; float X[] = { -0.812f }; float Y[] = { -0.667f }; int incX = -1; int incY = 1; float expected = 0.641604f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 5)"); }; { int N = 1; float alpha = 1.0f; float X[] = { -0.812f }; float Y[] = { -0.667f }; int incX = -1; int incY = 1; float expected = 1.541604f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 6)"); }; { int N = 1; float alpha = 0.0f; float X[] = { 0.481f }; float Y[] = { 0.523f }; int incX = -1; int incY = -1; float expected = 0.251563f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 7)"); }; { int N = 1; float alpha = 0.1f; float X[] = { 0.481f }; float Y[] = { 0.523f }; int incX = -1; int incY = -1; float expected = 0.351563f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 8)"); }; { int N = 1; float alpha = 1.0f; float X[] = { 0.481f }; float Y[] = { 0.523f }; int incX = -1; int incY = -1; float expected = 1.251563f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 9)"); }; { int N = 1; float X[] = { 0.785f }; float Y[] = { -0.7f }; int incX = 1; int incY = -1; float expected = -0.5495f; float f; f = cblas_sdot(N, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdot(case 10)"); }; { int N = 1; double X[] = { 0.79 }; double Y[] = { -0.679 }; int incX = 1; int incY = -1; double expected = -0.53641; double f; f = cblas_ddot(N, X, incX, Y, incY); gsl_test_rel(f, expected, dbleps, "ddot(case 11)"); }; { int N = 1; float X[] = { 0.474f, -0.27f }; float Y[] = { -0.144f, -0.392f }; int incX = 1; int incY = -1; float expected[2] = {-0.174096f, -0.146928f}; float f[2]; cblas_cdotu_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], flteps, "cdotu(case 12) real"); gsl_test_rel(f[1], expected[1], flteps, "cdotu(case 12) imag"); }; { int N = 1; float X[] = { 0.474f, -0.27f }; float Y[] = { -0.144f, -0.392f }; int incX = 1; int incY = -1; float expected[2] = {0.037584f, -0.224688f}; float f[2]; cblas_cdotc_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], flteps, "cdotc(case 13) real"); gsl_test_rel(f[1], expected[1], flteps, "cdotc(case 13) imag"); }; { int N = 1; double X[] = { -0.87, -0.631 }; double Y[] = { -0.7, -0.224 }; int incX = 1; int incY = -1; double expected[2] = {0.467656, 0.63658}; double f[2]; cblas_zdotu_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], dbleps, "zdotu(case 14) real"); gsl_test_rel(f[1], expected[1], dbleps, "zdotu(case 14) imag"); }; { int N = 1; double X[] = { -0.87, -0.631 }; double Y[] = { -0.7, -0.224 }; int incX = 1; int incY = -1; double expected[2] = {0.750344, -0.24682}; double f[2]; cblas_zdotc_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], dbleps, "zdotc(case 15) real"); gsl_test_rel(f[1], expected[1], dbleps, "zdotc(case 15) imag"); }; { int N = 1; float X[] = { -0.457f }; float Y[] = { 0.839f }; int incX = -1; int incY = 1; float expected = -0.383423f; float f; f = cblas_sdot(N, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdot(case 16)"); }; { int N = 1; double X[] = { 0.949 }; double Y[] = { -0.873 }; int incX = -1; int incY = 1; double expected = -0.828477; double f; f = cblas_ddot(N, X, incX, Y, incY); gsl_test_rel(f, expected, dbleps, "ddot(case 17)"); }; { int N = 1; float X[] = { 0.852f, -0.045f }; float Y[] = { 0.626f, -0.164f }; int incX = -1; int incY = 1; float expected[2] = {0.525972f, -0.167898f}; float f[2]; cblas_cdotu_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], flteps, "cdotu(case 18) real"); gsl_test_rel(f[1], expected[1], flteps, "cdotu(case 18) imag"); }; { int N = 1; float X[] = { 0.852f, -0.045f }; float Y[] = { 0.626f, -0.164f }; int incX = -1; int incY = 1; float expected[2] = {0.540732f, -0.111558f}; float f[2]; cblas_cdotc_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], flteps, "cdotc(case 19) real"); gsl_test_rel(f[1], expected[1], flteps, "cdotc(case 19) imag"); }; { int N = 1; double X[] = { -0.786, -0.341 }; double Y[] = { -0.271, -0.896 }; int incX = -1; int incY = 1; double expected[2] = {-0.09253, 0.796667}; double f[2]; cblas_zdotu_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], dbleps, "zdotu(case 20) real"); gsl_test_rel(f[1], expected[1], dbleps, "zdotu(case 20) imag"); }; { int N = 1; double X[] = { -0.786, -0.341 }; double Y[] = { -0.271, -0.896 }; int incX = -1; int incY = 1; double expected[2] = {0.518542, 0.611845}; double f[2]; cblas_zdotc_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], dbleps, "zdotc(case 21) real"); gsl_test_rel(f[1], expected[1], dbleps, "zdotc(case 21) imag"); }; { int N = 1; float X[] = { -0.088f }; float Y[] = { -0.165f }; int incX = -1; int incY = -1; float expected = 0.01452f; float f; f = cblas_sdot(N, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdot(case 22)"); }; { int N = 1; double X[] = { -0.434 }; double Y[] = { -0.402 }; int incX = -1; int incY = -1; double expected = 0.174468; double f; f = cblas_ddot(N, X, incX, Y, incY); gsl_test_rel(f, expected, dbleps, "ddot(case 23)"); }; { int N = 1; float X[] = { -0.347f, 0.899f }; float Y[] = { -0.113f, -0.858f }; int incX = -1; int incY = -1; float expected[2] = {0.810553f, 0.196139f}; float f[2]; cblas_cdotu_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], flteps, "cdotu(case 24) real"); gsl_test_rel(f[1], expected[1], flteps, "cdotu(case 24) imag"); }; { int N = 1; float X[] = { -0.347f, 0.899f }; float Y[] = { -0.113f, -0.858f }; int incX = -1; int incY = -1; float expected[2] = {-0.732131f, 0.399313f}; float f[2]; cblas_cdotc_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], flteps, "cdotc(case 25) real"); gsl_test_rel(f[1], expected[1], flteps, "cdotc(case 25) imag"); }; { int N = 1; double X[] = { -0.897, -0.204 }; double Y[] = { -0.759, 0.557 }; int incX = -1; int incY = -1; double expected[2] = {0.794451, -0.344793}; double f[2]; cblas_zdotu_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], dbleps, "zdotu(case 26) real"); gsl_test_rel(f[1], expected[1], dbleps, "zdotu(case 26) imag"); }; { int N = 1; double X[] = { -0.897, -0.204 }; double Y[] = { -0.759, 0.557 }; int incX = -1; int incY = -1; double expected[2] = {0.567195, -0.654465}; double f[2]; cblas_zdotc_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], dbleps, "zdotc(case 27) real"); gsl_test_rel(f[1], expected[1], dbleps, "zdotc(case 27) imag"); }; }
void wrapper_cblas_cdotu_sub(const int N, const void *X, const int incX, const void *Y, const int incY, void *dotu) { cblas_cdotu_sub(N, X, incX, Y, incY, dotu); }
void WRAP_F77(acc_cdotu_sub)(const int *N, const void *X, const int *incX, const void *Y, const int *incY, void *dotu) { cblas_cdotu_sub(*N, X, *incX, Y, *incY, dotu); }
complex float cdotu_(int *N, void *CX, int *INCX, void *CY, int *INCY) { complex float dotu; cblas_cdotu_sub(*N, CX, *INCX, CY, *INCY, &dotu); return dotu; }
void F77_cdotu(const int *N, void *X, const int *incX, void *Y, const int *incY,void *dotu) { cblas_cdotu_sub(*N, X, *incX, Y, *incY, dotu); return; }
// // Overloaded function for dispatching to // * CBLAS backend, and // * complex<float> value-type. // inline std::complex<float> dot( const int n, const std::complex<float>* x, const int incx, const std::complex<float>* y, const int incy ) { std::complex<float> result; cblas_cdotu_sub( n, x, incx, y, incy, &result ); return result; }
// ---------------------------------------- int main( int argc, char** argv ) { TESTING_INIT(); //real_Double_t t_m, t_c, t_f; magma_int_t ione = 1; magmaFloatComplex *A, *B; float diff, error; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t m, n, k, size, maxn, ld; magmaFloatComplex x2_m, x2_c; // complex x for magma, cblas/fortran blas respectively float x_m, x_c; // x for magma, cblas/fortran blas respectively magma_opts opts; parse_opts( argc, argv, &opts ); opts.tolerance = max( 100., opts.tolerance ); float tol = opts.tolerance * lapackf77_slamch("E"); gTol = tol; printf( "!! Calling these CBLAS and Fortran BLAS sometimes crashes (segfault), which !!\n" "!! is why we use wrappers. It does not necesarily indicate a bug in MAGMA. !!\n" "\n" "Diff compares MAGMA wrapper to CBLAS and BLAS function; should be exactly 0.\n" "Error compares MAGMA implementation to CBLAS and BLAS function; should be ~ machine epsilon.\n" "\n" ); float total_diff = 0.; float total_error = 0.; int inc[] = { 1 }; //{ -2, -1, 1, 2 }; //{ 1 }; //{ -1, 1 }; int ninc = sizeof(inc)/sizeof(*inc); for( int itest = 0; itest < opts.ntest; ++itest ) { m = opts.msize[itest]; n = opts.nsize[itest]; k = opts.ksize[itest]; for( int iincx = 0; iincx < ninc; ++iincx ) { magma_int_t incx = inc[iincx]; for( int iincy = 0; iincy < ninc; ++iincy ) { magma_int_t incy = inc[iincy]; printf("=========================================================================\n"); printf( "m=%d, n=%d, k=%d, incx = %d, incy = %d\n", (int) m, (int) n, (int) k, (int) incx, (int) incy ); printf( "Function MAGMA CBLAS BLAS Diff Error\n" " msec msec msec\n" ); // allocate matrices // over-allocate so they can be any combination of // {m,n,k} * {abs(incx), abs(incy)} by // {m,n,k} * {abs(incx), abs(incy)} maxn = max( max( m, n ), k ) * max( abs(incx), abs(incy) ); ld = max( 1, maxn ); size = ld*maxn; magma_cmalloc_pinned( &A, size ); assert( A != NULL ); magma_cmalloc_pinned( &B, size ); assert( B != NULL ); // initialize matrices lapackf77_clarnv( &ione, ISEED, &size, A ); lapackf77_clarnv( &ione, ISEED, &size, B ); printf( "Level 1 BLAS ----------------------------------------------------------\n" ); // ----- test SCASUM // get one-norm of column j of A if ( incx > 0 && incx == incy ) { // positive, no incy diff = 0; error = 0; for( int j = 0; j < k; ++j ) { x_m = magma_cblas_scasum( m, A(0,j), incx ); x_c = cblas_scasum( m, A(0,j), incx ); diff += fabs( x_m - x_c ); x_c = blasf77_scasum( &m, A(0,j), &incx ); error += fabs( (x_m - x_c) / (m*x_c) ); } output( "scasum", diff, error ); total_diff += diff; total_error += error; } // ----- test SCNRM2 // get two-norm of column j of A if ( incx > 0 && incx == incy ) { // positive, no incy diff = 0; error = 0; for( int j = 0; j < k; ++j ) { x_m = magma_cblas_scnrm2( m, A(0,j), incx ); x_c = cblas_scnrm2( m, A(0,j), incx ); diff += fabs( x_m - x_c ); x_c = blasf77_scnrm2( &m, A(0,j), &incx ); error += fabs( (x_m - x_c) / (m*x_c) ); } output( "scnrm2", diff, error ); total_diff += diff; total_error += error; } // ----- test CDOTC // dot columns, Aj^H Bj diff = 0; error = 0; for( int j = 0; j < k; ++j ) { // MAGMA implementation, not just wrapper x2_m = magma_cblas_cdotc( m, A(0,j), incx, B(0,j), incy ); // crashes on MKL 11.1.2, ILP64 #if ! defined( MAGMA_WITH_MKL ) #ifdef COMPLEX cblas_cdotc_sub( m, A(0,j), incx, B(0,j), incy, &x2_c ); #else x2_c = cblas_cdotc( m, A(0,j), incx, B(0,j), incy ); #endif error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif // crashes on MacOS 10.9 #if ! defined( __APPLE__ ) x2_c = blasf77_cdotc( &m, A(0,j), &incx, B(0,j), &incy ); error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif } output( "cdotc", diff, error ); total_diff += diff; total_error += error; total_error += error; // ----- test CDOTU // dot columns, Aj^T * Bj diff = 0; error = 0; for( int j = 0; j < k; ++j ) { // MAGMA implementation, not just wrapper x2_m = magma_cblas_cdotu( m, A(0,j), incx, B(0,j), incy ); // crashes on MKL 11.1.2, ILP64 #if ! defined( MAGMA_WITH_MKL ) #ifdef COMPLEX cblas_cdotu_sub( m, A(0,j), incx, B(0,j), incy, &x2_c ); #else x2_c = cblas_cdotu( m, A(0,j), incx, B(0,j), incy ); #endif error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif // crashes on MacOS 10.9 #if ! defined( __APPLE__ ) x2_c = blasf77_cdotu( &m, A(0,j), &incx, B(0,j), &incy ); error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif } output( "cdotu", diff, error ); total_diff += diff; total_error += error; // tell user about disabled functions #if defined( MAGMA_WITH_MKL ) printf( "cblas_cdotc and cblas_cdotu disabled with MKL (segfaults)\n" ); #endif #if defined( __APPLE__ ) printf( "blasf77_cdotc and blasf77_cdotu disabled on MacOS (segfaults)\n" ); #endif // cleanup magma_free_pinned( A ); magma_free_pinned( B ); fflush( stdout ); }}} // itest, incx, incy // TODO use average error? printf( "sum diffs = %8.2g, MAGMA wrapper compared to CBLAS and Fortran BLAS; should be exactly 0.\n" "sum errors = %8.2e, MAGMA implementation compared to CBLAS and Fortran BLAS; should be ~ machine epsilon.\n\n", total_diff, total_error ); if ( total_diff != 0. ) { printf( "some tests failed diff == 0.; see above.\n" ); } else { printf( "all tests passed diff == 0.\n" ); } TESTING_FINALIZE(); int status = (total_diff != 0.); return status; }
magma_int_t magma_clatrsd( magma_uplo_t uplo, magma_trans_t trans, magma_diag_t diag, magma_bool_t normin, magma_int_t n, const magmaFloatComplex *A, magma_int_t lda, magmaFloatComplex lambda, magmaFloatComplex *x, float *scale, float *cnorm, magma_int_t *info) { #define A(i,j) (A + (i) + (j)*lda) /* constants */ const magma_int_t ione = 1; const float d_half = 0.5; const magmaFloatComplex c_zero = MAGMA_C_ZERO; const magmaFloatComplex c_one = MAGMA_C_ONE; /* System generated locals */ magma_int_t len; magmaFloatComplex ztmp; /* Local variables */ magma_int_t i, j; float xj, rec, tjj; magma_int_t jinc; float xbnd; magma_int_t imax; float tmax; magmaFloatComplex tjjs; float xmax, grow; float tscal; magmaFloatComplex uscal; magma_int_t jlast; magmaFloatComplex csumj; float bignum; magma_int_t jfirst; float smlnum; /* Function Body */ *info = 0; magma_int_t upper = (uplo == MagmaUpper); magma_int_t notran = (trans == MagmaNoTrans); magma_int_t nounit = (diag == MagmaNonUnit); /* Test the input parameters. */ if ( ! upper && uplo != MagmaLower ) { *info = -1; } else if (! notran && trans != MagmaTrans && trans != MagmaConjTrans) { *info = -2; } else if ( ! nounit && diag != MagmaUnit ) { *info = -3; } else if ( ! (normin == MagmaTrue) && ! (normin == MagmaFalse) ) { *info = -4; } else if ( n < 0 ) { *info = -5; } else if ( lda < max(1,n) ) { *info = -7; } if ( *info != 0 ) { magma_xerbla( __func__, -(*info) ); return *info; } /* Quick return if possible */ if ( n == 0 ) { return *info; } /* Determine machine dependent parameters to control overflow. */ smlnum = lapackf77_slamch( "Safe minimum" ); bignum = 1. / smlnum; lapackf77_slabad( &smlnum, &bignum ); smlnum /= lapackf77_slamch( "Precision" ); bignum = 1. / smlnum; *scale = 1.; if ( normin == MagmaFalse ) { /* Compute the 1-norm of each column, not including the diagonal. */ if ( upper ) { /* A is upper triangular. */ cnorm[0] = 0.; for( j = 1; j < n; ++j ) { cnorm[j] = cblas_scasum( j, A(0,j), ione ); } } else { /* A is lower triangular. */ for( j = 0; j < n-1; ++j ) { cnorm[j] = cblas_scasum( n-(j+1), A(j+1,j), ione ); } cnorm[n-1] = 0.; } } /* Scale the column norms by TSCAL if the maximum element in CNORM is */ /* greater than BIGNUM/2. */ imax = blasf77_isamax( &n, &cnorm[0], &ione ) - 1; tmax = cnorm[imax]; if ( tmax <= bignum * 0.5 ) { tscal = 1.; } else { tscal = 0.5 / (smlnum * tmax); blasf77_sscal( &n, &tscal, &cnorm[0], &ione ); } /* ================================================================= */ /* Compute a bound on the computed solution vector to see if the */ /* Level 2 BLAS routine CTRSV can be used. */ xmax = 0.; for( j = 0; j < n; ++j ) { xmax = max( xmax, 0.5*MAGMA_C_ABS1( x[j] )); } xbnd = xmax; if ( notran ) { /* ---------------------------------------- */ /* Compute the growth in A * x = b. */ if ( upper ) { jfirst = n-1; jlast = 0; jinc = -1; } else { jfirst = 0; jlast = n; jinc = 1; } if ( tscal != 1. ) { grow = 0.; goto L60; } /* A is non-unit triangular. */ /* Compute GROW = 1/G(j) and XBND = 1/M(j). */ /* Initially, G(0) = max{x(i), i=1,...,n}. */ grow = 0.5 / max( xbnd, smlnum ); xbnd = grow; for( j = jfirst; (jinc < 0 ? j >= jlast : j < jlast); j += jinc ) { /* Exit the loop if the growth factor is too small. */ if ( grow <= smlnum ) { goto L60; } if ( nounit ) { tjjs = *A(j,j) - lambda; } else { tjjs = c_one - lambda; } tjj = MAGMA_C_ABS1( tjjs ); if ( tjj >= smlnum ) { /* M(j) = G(j-1) / abs(A(j,j)) */ xbnd = min( xbnd, min(1.,tjj)*grow ); } else { /* M(j) could overflow, set XBND to 0. */ xbnd = 0.; } if ( tjj + cnorm[j] >= smlnum ) { /* G(j) = G(j-1)*( 1 + CNORM(j) / abs(A(j,j)) ) */ grow *= (tjj / (tjj + cnorm[j])); } else { /* G(j) could overflow, set GROW to 0. */ grow = 0.; } } grow = xbnd; L60: ; } else { /* ---------------------------------------- */ /* Compute the growth in A**T * x = b or A**H * x = b. */ if ( upper ) { jfirst = 0; jlast = n; jinc = 1; } else { jfirst = n-1; jlast = 0; jinc = -1; } if ( tscal != 1. ) { grow = 0.; goto L90; } /* A is non-unit triangular. */ /* Compute GROW = 1/G(j) and XBND = 1/M(j). */ /* Initially, M(0) = max{x(i), i=1,...,n}. */ grow = 0.5 / max( xbnd, smlnum ); xbnd = grow; for( j = jfirst; (jinc < 0 ? j >= jlast : j < jlast); j += jinc ) { /* Exit the loop if the growth factor is too small. */ if ( grow <= smlnum ) { goto L90; } /* G(j) = max( G(j-1), M(j-1)*( 1 + CNORM(j) ) ) */ xj = 1. + cnorm[j]; grow = min( grow, xbnd / xj ); if ( nounit ) { tjjs = *A(j,j) - lambda; } else { tjjs = c_one - lambda; } tjj = MAGMA_C_ABS1( tjjs ); if ( tjj >= smlnum ) { /* M(j) = M(j-1)*( 1 + CNORM(j) ) / abs(A(j,j)) */ if ( xj > tjj ) { xbnd *= (tjj / xj); } } else { /* M(j) could overflow, set XBND to 0. */ xbnd = 0.; } } grow = min( grow, xbnd ); L90: ; } /* ================================================================= */ /* Due to modified diagonal, we can't use regular BLAS ctrsv. */ /* Use a Level 1 BLAS solve, scaling intermediate results. */ if ( xmax > bignum * 0.5 ) { /* Scale X so that its components are less than or equal to */ /* BIGNUM in absolute value. */ *scale = (bignum * 0.5) / xmax; blasf77_csscal( &n, scale, &x[0], &ione ); xmax = bignum; } else { xmax *= 2.; } if ( notran ) { /* ---------------------------------------- */ /* Solve A * x = b */ for( j = jfirst; (jinc < 0 ? j >= jlast : j < jlast); j += jinc ) { /* Compute x(j) = b(j) / A(j,j), scaling x if necessary. */ xj = MAGMA_C_ABS1( x[j] ); if ( nounit ) { tjjs = (*A(j,j) - lambda ) * tscal; } else { tjjs = (c_one - lambda) * tscal; if ( tscal == 1. ) { goto L110; } } tjj = MAGMA_C_ABS1( tjjs ); if ( tjj > smlnum ) { /* abs(A(j,j)) > SMLNUM: */ if ( tjj < 1. ) { if ( xj > tjj * bignum ) { /* Scale x by 1/b(j). */ rec = 1. / xj; blasf77_csscal( &n, &rec, &x[0], &ione ); *scale *= rec; xmax *= rec; } } x[j] = x[j] / tjjs; xj = MAGMA_C_ABS1( x[j] ); } else if ( tjj > 0. ) { /* 0 < abs(A(j,j)) <= SMLNUM: */ if ( xj > tjj * bignum ) { /* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM */ /* to avoid overflow when dividing by A(j,j). */ rec = (tjj * bignum) / xj; if ( cnorm[j] > 1. ) { /* Scale by 1/CNORM(j) to avoid overflow when */ /* multiplying x(j) times column j. */ rec /= cnorm[j]; } blasf77_csscal( &n, &rec, &x[0], &ione ); *scale *= rec; xmax *= rec; } x[j] = x[j] / tjjs; xj = MAGMA_C_ABS1( x[j] ); } else { /* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ /* scale = 0, and compute a solution to A*x = 0. */ for( i = 0; i < n; ++i ) { x[i] = c_zero; } x[j] = c_one; xj = 1.; *scale = 0.; xmax = 0.; } L110: /* Scale x if necessary to avoid overflow when adding a */ /* multiple of column j of A. */ if ( xj > 1. ) { rec = 1. / xj; if ( cnorm[j] > (bignum - xmax) * rec ) { /* Scale x by 1/(2*abs(x(j))). */ rec *= 0.5; blasf77_csscal( &n, &rec, &x[0], &ione ); *scale *= rec; } } else if ( xj * cnorm[j] > bignum - xmax ) { /* Scale x by 1/2. */ blasf77_csscal( &n, &d_half, &x[0], &ione ); *scale *= 0.5; } if ( upper ) { if ( j > 0 ) { /* Compute the update */ /* x(1:j-1) := x(1:j-1) - x(j) * A(1:j-1,j) */ len = j; ztmp = -tscal * x[j]; blasf77_caxpy( &len, &ztmp, A(0,j), &ione, &x[0], &ione ); i = blasf77_icamax( &len, &x[0], &ione ) - 1; xmax = MAGMA_C_ABS1( x[i] ); } } else { if ( j < n-1 ) { /* Compute the update */ /* x(j+1:n) := x(j+1:n) - x(j) * A(j+1:n,j) */ len = n - (j+1); ztmp = -tscal * x[j]; blasf77_caxpy( &len, &ztmp, A(j+1,j), &ione, &x[j + 1], &ione ); i = j + blasf77_icamax( &len, &x[j + 1], &ione ); xmax = MAGMA_C_ABS1( x[i] ); } } } } else if ( trans == MagmaTrans ) { /* ---------------------------------------- */ /* Solve A**T * x = b */ for( j = jfirst; (jinc < 0 ? j >= jlast : j < jlast); j += jinc ) { /* Compute x(j) = b(j) - sum A(k,j)*x(k). */ /* k<>j */ xj = MAGMA_C_ABS1( x[j] ); uscal = MAGMA_C_MAKE( tscal, 0. ); rec = 1. / max( xmax, 1. ); if ( cnorm[j] > (bignum - xj) * rec ) { /* If x(j) could overflow, scale x by 1/(2*XMAX). */ rec *= 0.5; if ( nounit ) { tjjs = (*A(j,j) - lambda) * tscal; } else { tjjs = (c_one - lambda) * tscal; } tjj = MAGMA_C_ABS1( tjjs ); if ( tjj > 1. ) { /* Divide by A(j,j) when scaling x if A(j,j) > 1. */ rec = min( 1., rec * tjj ); uscal = uscal / tjjs; } if ( rec < 1. ) { blasf77_csscal( &n, &rec, &x[0], &ione ); *scale *= rec; xmax *= rec; } } csumj = c_zero; if ( uscal == c_one ) { /* If the scaling needed for A in the dot product is 1, */ /* call ZDOTU to perform the dot product. */ if ( upper ) { cblas_cdotu_sub( j, A(0,j), ione, &x[0], ione, &csumj ); } else if ( j < n-1 ) { cblas_cdotu_sub( n-(j+1), A(j+1,j), ione, &x[j + 1], ione, &csumj ); } } else { /* Otherwise, use in-line code for the dot product. */ if ( upper ) { for( i = 0; i < j; ++i ) { csumj += (*A(i,j) * uscal) * x[i]; } } else if ( j < n-1 ) { for( i = j+1; i < n; ++i ) { csumj += (*A(i,j) * uscal) * x[i]; } } } if ( uscal == MAGMA_C_MAKE( tscal, 0. )) { /* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) */ /* was not used to scale the dotproduct. */ x[j] -= csumj; xj = MAGMA_C_ABS1( x[j] ); if ( nounit ) { tjjs = (*A(j,j) - lambda) * tscal; } else { tjjs = (c_one - lambda) * tscal; if ( tscal == 1. ) { goto L160; } } /* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ tjj = MAGMA_C_ABS1( tjjs ); if ( tjj > smlnum ) { /* abs(A(j,j)) > SMLNUM: */ if ( tjj < 1. ) { if ( xj > tjj * bignum ) { /* Scale X by 1/abs(x(j)). */ rec = 1. / xj; blasf77_csscal( &n, &rec, &x[0], &ione ); *scale *= rec; xmax *= rec; } } x[j] = x[j] / tjjs; } else if ( tjj > 0. ) { /* 0 < abs(A(j,j)) <= SMLNUM: */ if ( xj > tjj * bignum ) { /* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ rec = (tjj * bignum) / xj; blasf77_csscal( &n, &rec, &x[0], &ione ); *scale *= rec; xmax *= rec; } x[j] = x[j] / tjjs; } else { /* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ /* scale = 0 and compute a solution to A**T *x = 0. */ for( i = 0; i < n; ++i ) { x[i] = c_zero; } x[j] = c_one; *scale = 0.; xmax = 0.; } L160: ; } else { /* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot */ /* product has already been divided by 1/A(j,j). */ x[j] = (x[j] / tjjs) - csumj; } xmax = max( xmax, MAGMA_C_ABS1( x[j] )); } } else { /* ---------------------------------------- */ /* Solve A**H * x = b */ for( j = jfirst; (jinc < 0 ? j >= jlast : j < jlast); j += jinc ) { /* Compute x(j) = b(j) - sum A(k,j)*x(k). */ /* k<>j */ xj = MAGMA_C_ABS1( x[j] ); uscal = MAGMA_C_MAKE( tscal, 0. ); rec = 1. / max(xmax, 1.); if ( cnorm[j] > (bignum - xj) * rec ) { /* If x(j) could overflow, scale x by 1/(2*XMAX). */ rec *= 0.5; if ( nounit ) { tjjs = MAGMA_C_CNJG( *A(j,j) - lambda ) * tscal; } else { tjjs = (c_one - lambda) * tscal; } tjj = MAGMA_C_ABS1( tjjs ); if ( tjj > 1. ) { /* Divide by A(j,j) when scaling x if A(j,j) > 1. */ rec = min( 1., rec * tjj ); uscal = uscal / tjjs; } if ( rec < 1. ) { blasf77_csscal( &n, &rec, &x[0], &ione ); *scale *= rec; xmax *= rec; } } csumj = c_zero; if ( uscal == c_one ) { /* If the scaling needed for A in the dot product is 1, */ /* call CDOTC to perform the dot product. */ if ( upper ) { cblas_cdotc_sub( j, A(0,j), ione, &x[0], ione, &csumj ); } else if ( j < n-1 ) { cblas_cdotc_sub( n-(j+1), A(j+1,j), ione, &x[j + 1], ione, &csumj ); } } else { /* Otherwise, use in-line code for the dot product. */ if ( upper ) { for( i = 0; i < j; ++i ) { csumj += (MAGMA_C_CNJG( *A(i,j) ) * uscal) * x[i]; } } else if ( j < n-1 ) { for( i = j + 1; i < n; ++i ) { csumj += (MAGMA_C_CNJG( *A(i,j) ) * uscal) * x[i]; } } } if ( uscal == tscal ) { /* Compute x(j) := ( x(j) - CSUMJ ) / A(j,j) if 1/A(j,j) */ /* was not used to scale the dotproduct. */ x[j] -= csumj; xj = MAGMA_C_ABS1( x[j] ); if ( nounit ) { tjjs = MAGMA_C_CNJG( *A(j,j) - lambda ) * tscal; } else { tjjs = (c_one - lambda) * tscal; if ( tscal == 1. ) { goto L210; } } /* Compute x(j) = x(j) / A(j,j), scaling if necessary. */ tjj = MAGMA_C_ABS1( tjjs ); if ( tjj > smlnum ) { /* abs(A(j,j)) > SMLNUM: */ if ( tjj < 1. ) { if ( xj > tjj * bignum ) { /* Scale X by 1/abs(x(j)). */ rec = 1. / xj; blasf77_csscal( &n, &rec, &x[0], &ione ); *scale *= rec; xmax *= rec; } } x[j] = x[j] / tjjs; } else if ( tjj > 0. ) { /* 0 < abs(A(j,j)) <= SMLNUM: */ if ( xj > tjj * bignum ) { /* Scale x by (1/abs(x(j)))*abs(A(j,j))*BIGNUM. */ rec = (tjj * bignum) / xj; blasf77_csscal( &n, &rec, &x[0], &ione ); *scale *= rec; xmax *= rec; } x[j] = x[j] / tjjs; } else { /* A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and */ /* scale = 0 and compute a solution to A**H *x = 0. */ for( i = 0; i < n; ++i ) { x[i] = c_zero; } x[j] = c_one; *scale = 0.; xmax = 0.; } L210: ; } else { /* Compute x(j) := x(j) / A(j,j) - CSUMJ if the dot */ /* product has already been divided by 1/A(j,j). */ x[j] = (x[j] / tjjs) - csumj; } xmax = max( xmax, MAGMA_C_ABS1( x[j] )); } } *scale /= tscal; /* Scale the column norms by 1/TSCAL for return. */ if ( tscal != 1. ) { float d = 1. / tscal; blasf77_sscal( &n, &d, &cnorm[0], &ione ); } return *info; } /* end clatrsd */
DLLEXPORT openblas_complex_float c_dot_product(const blasint n, const openblas_complex_float x[], const openblas_complex_float y[]) { openblas_complex_float ret; cblas_cdotu_sub(n, (float*)x, 1, (float*)y, 1, &ret); return ret; }
void WRAP_F77(veclib_cdotu)(const int *N, const complex float *X, const int *incX, const complex float *Y, const int *incY, complex float* dotu) { cblas_cdotu_sub(*N, X, *incX, Y, *incY, dotu); }