void bli_zdot( conj_t conj, int n, dcomplex* x, int incx, dcomplex* y, int incy, dcomplex* rho ) { #ifdef BLIS_ENABLE_CBLAS_INTERFACES if ( bli_is_conj( conj ) ) { cblas_zdotc_sub( n, x, incx, y, incy, rho ); } else // if ( !bli_is_conj( conj ) ) { cblas_zdotu_sub( n, x, incx, y, incy, rho ); } #else bli_zdot_in( conj, n, x, incx, y, incy, rho ); #endif }
double wig20rot(complx *vec, complx *d20) { complx res; const int len=5; cblas_zdotu_sub(len,vec,INTONE,d20,INTONE,&res); if ( fabs(res.im) > 1e-8 ) { fprintf(stderr,"wig20rot error: result is not pure real\n"); exit(1); } return res.re; }
static void CDOUBLE_dot(void *a, intp stridea, void *b, intp strideb, void *res, intp n, void *tmp) { register int na = stridea / sizeof(cdouble); register int nb = strideb / sizeof(cdouble); if ((sizeof(cdouble) * na == stridea) && (sizeof(cdouble) * nb == strideb) && (na >= 0) && (nb >= 0)) cblas_zdotu_sub((int)n, (double *)a, na, (double *)b, nb, (double *)res); else oldFunctions[PyArray_CDOUBLE](a, stridea, b, strideb, res, n, tmp); }
void phi_dotu_sub(const int N, const Complex *X, const int incX, const Complex *Y, const int incY, Complex *dotu){ #ifndef NOBLAS #ifdef SINGLEPRECISION cblas_cdotu_sub(N,X,incX,Y,incY,dotu); #else cblas_zdotu_sub(N,X,incX,Y,incY,dotu); #endif #else int i; *dotu = 0; for(i = 0; i < N; ++i, X+=incX, Y+=incY){ *dotu += (*X)*(*Y); } #endif }
std::complex<double> HostVector<std::complex<double> >::DotNonConj(const BaseVector<std::complex<double> > &x) const { assert(&x != NULL); const HostVector<std::complex<double> > *cast_x = dynamic_cast<const HostVector<std::complex<double> >*> (&x); assert(cast_x != NULL); assert(this->size_ == cast_x->size_); std::complex<double> res; cblas_zdotu_sub(this->size_, this->vec_, 1, cast_x->vec_, 1, &res); return res; }
// ---------------------------------------- int main( int argc, char** argv ) { TESTING_INIT(); //real_Double_t t_m, t_c, t_f; magma_int_t ione = 1; magmaDoubleComplex *A, *B; double diff, error; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t m, n, k, size, maxn, ld; magmaDoubleComplex x2_m, x2_c; // complex x for magma, cblas/fortran blas respectively double x_m, x_c; // x for magma, cblas/fortran blas respectively magma_opts opts; parse_opts( argc, argv, &opts ); opts.tolerance = max( 100., opts.tolerance ); double tol = opts.tolerance * lapackf77_dlamch("E"); gTol = tol; printf( "!! Calling these CBLAS and Fortran BLAS sometimes crashes (segfault), which !!\n" "!! is why we use wrappers. It does not necesarily indicate a bug in MAGMA. !!\n" "\n" "Diff compares MAGMA wrapper to CBLAS and BLAS function; should be exactly 0.\n" "Error compares MAGMA implementation to CBLAS and BLAS function; should be ~ machine epsilon.\n" "\n" ); double total_diff = 0.; double total_error = 0.; int inc[] = { 1 }; //{ -2, -1, 1, 2 }; //{ 1 }; //{ -1, 1 }; int ninc = sizeof(inc)/sizeof(*inc); for( int itest = 0; itest < opts.ntest; ++itest ) { m = opts.msize[itest]; n = opts.nsize[itest]; k = opts.ksize[itest]; for( int iincx = 0; iincx < ninc; ++iincx ) { magma_int_t incx = inc[iincx]; for( int iincy = 0; iincy < ninc; ++iincy ) { magma_int_t incy = inc[iincy]; printf("=========================================================================\n"); printf( "m=%d, n=%d, k=%d, incx = %d, incy = %d\n", (int) m, (int) n, (int) k, (int) incx, (int) incy ); printf( "Function MAGMA CBLAS BLAS Diff Error\n" " msec msec msec\n" ); // allocate matrices // over-allocate so they can be any combination of // {m,n,k} * {abs(incx), abs(incy)} by // {m,n,k} * {abs(incx), abs(incy)} maxn = max( max( m, n ), k ) * max( abs(incx), abs(incy) ); ld = max( 1, maxn ); size = ld*maxn; magma_zmalloc_pinned( &A, size ); assert( A != NULL ); magma_zmalloc_pinned( &B, size ); assert( B != NULL ); // initialize matrices lapackf77_zlarnv( &ione, ISEED, &size, A ); lapackf77_zlarnv( &ione, ISEED, &size, B ); printf( "Level 1 BLAS ----------------------------------------------------------\n" ); // ----- test DZASUM // get one-norm of column j of A if ( incx > 0 && incx == incy ) { // positive, no incy diff = 0; error = 0; for( int j = 0; j < k; ++j ) { x_m = magma_cblas_dzasum( m, A(0,j), incx ); x_c = cblas_dzasum( m, A(0,j), incx ); diff += fabs( x_m - x_c ); x_c = blasf77_dzasum( &m, A(0,j), &incx ); error += fabs( (x_m - x_c) / (m*x_c) ); } output( "dzasum", diff, error ); total_diff += diff; total_error += error; } // ----- test DZNRM2 // get two-norm of column j of A if ( incx > 0 && incx == incy ) { // positive, no incy diff = 0; error = 0; for( int j = 0; j < k; ++j ) { x_m = magma_cblas_dznrm2( m, A(0,j), incx ); x_c = cblas_dznrm2( m, A(0,j), incx ); diff += fabs( x_m - x_c ); x_c = blasf77_dznrm2( &m, A(0,j), &incx ); error += fabs( (x_m - x_c) / (m*x_c) ); } output( "dznrm2", diff, error ); total_diff += diff; total_error += error; } // ----- test ZDOTC // dot columns, Aj^H Bj diff = 0; error = 0; for( int j = 0; j < k; ++j ) { // MAGMA implementation, not just wrapper x2_m = magma_cblas_zdotc( m, A(0,j), incx, B(0,j), incy ); // crashes on MKL 11.1.2, ILP64 #if ! defined( MAGMA_WITH_MKL ) #ifdef COMPLEX cblas_zdotc_sub( m, A(0,j), incx, B(0,j), incy, &x2_c ); #else x2_c = cblas_zdotc( m, A(0,j), incx, B(0,j), incy ); #endif error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif // crashes on MacOS 10.9 #if ! defined( __APPLE__ ) x2_c = blasf77_zdotc( &m, A(0,j), &incx, B(0,j), &incy ); error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif } output( "zdotc", diff, error ); total_diff += diff; total_error += error; total_error += error; // ----- test ZDOTU // dot columns, Aj^T * Bj diff = 0; error = 0; for( int j = 0; j < k; ++j ) { // MAGMA implementation, not just wrapper x2_m = magma_cblas_zdotu( m, A(0,j), incx, B(0,j), incy ); // crashes on MKL 11.1.2, ILP64 #if ! defined( MAGMA_WITH_MKL ) #ifdef COMPLEX cblas_zdotu_sub( m, A(0,j), incx, B(0,j), incy, &x2_c ); #else x2_c = cblas_zdotu( m, A(0,j), incx, B(0,j), incy ); #endif error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif // crashes on MacOS 10.9 #if ! defined( __APPLE__ ) x2_c = blasf77_zdotu( &m, A(0,j), &incx, B(0,j), &incy ); error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif } output( "zdotu", diff, error ); total_diff += diff; total_error += error; // tell user about disabled functions #if defined( MAGMA_WITH_MKL ) printf( "cblas_zdotc and cblas_zdotu disabled with MKL (segfaults)\n" ); #endif #if defined( __APPLE__ ) printf( "blasf77_zdotc and blasf77_zdotu disabled on MacOS (segfaults)\n" ); #endif // cleanup magma_free_pinned( A ); magma_free_pinned( B ); fflush( stdout ); }}} // itest, incx, incy // TODO use average error? printf( "sum diffs = %8.2g, MAGMA wrapper compared to CBLAS and Fortran BLAS; should be exactly 0.\n" "sum errors = %8.2e, MAGMA implementation compared to CBLAS and Fortran BLAS; should be ~ machine epsilon.\n\n", total_diff, total_error ); if ( total_diff != 0. ) { printf( "some tests failed diff == 0.; see above.\n" ); } else { printf( "all tests passed diff == 0.\n" ); } TESTING_FINALIZE(); int status = (total_diff != 0.); return status; }
// ---------------------------------------- int main( int argc, char** argv ) { TESTING_INIT(); //real_Double_t t_m, t_c, t_f; magma_int_t ione = 1; magmaDoubleComplex *A, *B; double error_cblas, error_fblas, error_inline; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t i, j, k, m, n, size, maxn, ld; // complex x for magma, cblas, fortran, inline blas respectively magmaDoubleComplex x2_m, x2_c, x2_f, x2_i; // real x for magma, cblas, fortran, inline blas respectively double x_m, x_c, x_f, x_i; MAGMA_UNUSED( x_c ); MAGMA_UNUSED( x_f ); MAGMA_UNUSED( x2_c ); MAGMA_UNUSED( x2_f ); MAGMA_UNUSED( x2_m ); magma_opts opts; opts.parse_opts( argc, argv ); opts.tolerance = max( 100., opts.tolerance ); double tol = opts.tolerance * lapackf77_dlamch("E"); gTol = tol; magma_int_t inc[] = { -2, -1, 1, 2 }; //{ 1 }; //{ -1, 1 }; magma_int_t ninc = sizeof(inc)/sizeof(*inc); magma_int_t maxinc = 0; for( i=0; i < ninc; ++i ) { maxinc = max( maxinc, abs(inc[i]) ); } printf( "!! Calling these CBLAS and Fortran BLAS sometimes crashes (segfaults), which !!\n" "!! is why we use wrappers. It does not necesarily indicate a bug in MAGMA. !!\n" "!! If MAGMA_WITH_MKL or __APPLE__ are defined, known failures are skipped. !!\n" "\n" ); // tell user about disabled functions #ifndef HAVE_CBLAS printf( "n/a: HAVE_CBLAS not defined, so no cblas functions tested.\n\n" ); #endif #if defined(MAGMA_WITH_MKL) printf( "n/a: cblas_zdotc, cblas_zdotu, blasf77_zdotc, and blasf77_zdotu are disabled with MKL, due to segfaults.\n\n" ); #endif #if defined(__APPLE__) printf( "n/a: blasf77_zdotc and blasf77_zdotu are disabled on MacOS, due to segfaults.\n\n" ); #endif printf( "%% Error w.r.t. Error w.r.t. Error w.r.t.\n" "%% M N K incx incy Function CBLAS Fortran BLAS inline\n" "%%====================================================================================\n" ); for( int itest = 0; itest < opts.ntest; ++itest ) { if ( itest > 0 ) { printf( "%%----------------------------------------------------------------------\n" ); } m = opts.msize[itest]; n = opts.nsize[itest]; k = opts.ksize[itest]; // allocate matrices // over-allocate so they can be any combination of // {m,n,k} * {abs(incx), abs(incy)} by // {m,n,k} * {abs(incx), abs(incy)} maxn = max( max( m, n ), k ) * maxinc; ld = max( 1, maxn ); size = ld*maxn; TESTING_MALLOC_CPU( A, magmaDoubleComplex, size ); TESTING_MALLOC_CPU( B, magmaDoubleComplex, size ); // initialize matrices lapackf77_zlarnv( &ione, ISEED, &size, A ); lapackf77_zlarnv( &ione, ISEED, &size, B ); // ----- test DZASUM for( int iincx = 0; iincx < ninc; ++iincx ) { magma_int_t incx = inc[iincx]; for( int iincy = 0; iincy < ninc; ++iincy ) { magma_int_t incy = inc[iincy]; // get one-norm of column j of A if ( incx > 0 && incx == incy ) { // positive, no incy error_cblas = 0; error_fblas = 0; error_inline = 0; for( j=0; j < k; ++j ) { x_m = magma_cblas_dzasum( m, A(0,j), incx ); #ifdef HAVE_CBLAS x_c = cblas_dzasum( m, A(0,j), incx ); error_cblas = max( error_cblas, fabs(x_m - x_c) / fabs(m*x_c) ); #else x_c = 0; error_cblas = SKIPPED_FLAG; #endif x_f = blasf77_dzasum( &m, A(0,j), &incx ); error_fblas = max( error_fblas, fabs(x_m - x_f) / fabs(m*x_f) ); // inline implementation x_i = 0; for( i=0; i < m; ++i ) { x_i += MAGMA_Z_ABS1( *A(i*incx,j) ); // |real(Aij)| + |imag(Aij)| } error_inline = max( error_inline, fabs(x_m - x_i) / fabs(m*x_i) ); //printf( "dzasum xm %.8e, xc %.8e, xf %.8e, xi %.8e\n", x_m, x_c, x_f, x_i ); } output( "dzasum", m, n, k, incx, incy, error_cblas, error_fblas, error_inline ); } } } printf( "\n" ); // ----- test DZNRM2 // get two-norm of column j of A for( int iincx = 0; iincx < ninc; ++iincx ) { magma_int_t incx = inc[iincx]; for( int iincy = 0; iincy < ninc; ++iincy ) { magma_int_t incy = inc[iincy]; if ( incx > 0 && incx == incy ) { // positive, no incy error_cblas = 0; error_fblas = 0; error_inline = 0; for( j=0; j < k; ++j ) { x_m = magma_cblas_dznrm2( m, A(0,j), incx ); #ifdef HAVE_CBLAS x_c = cblas_dznrm2( m, A(0,j), incx ); error_cblas = max( error_cblas, fabs(x_m - x_c) / fabs(m*x_c) ); #else x_c = 0; error_cblas = SKIPPED_FLAG; #endif x_f = blasf77_dznrm2( &m, A(0,j), &incx ); error_fblas = max( error_fblas, fabs(x_m - x_f) / fabs(m*x_f) ); // inline implementation (poor -- doesn't scale) x_i = 0; for( i=0; i < m; ++i ) { x_i += real( *A(i*incx,j) ) * real( *A(i*incx,j) ) + imag( *A(i*incx,j) ) * imag( *A(i*incx,j) ); // same: real( conj( *A(i*incx,j) ) * *A(i*incx,j) ); } x_i = sqrt( x_i ); error_inline = max( error_inline, fabs(x_m - x_i) / fabs(m*x_i) ); //printf( "dznrm2 xm %.8e, xc %.8e, xf %.8e, xi %.8e\n", x_m, x_c, x_f, x_i ); } output( "dznrm2", m, n, k, incx, incy, error_cblas, error_fblas, error_inline ); } } } printf( "\n" ); // ----- test ZDOTC // dot columns, Aj^H Bj for( int iincx = 0; iincx < ninc; ++iincx ) { magma_int_t incx = inc[iincx]; for( int iincy = 0; iincy < ninc; ++iincy ) { magma_int_t incy = inc[iincy]; error_cblas = 0; error_fblas = 0; error_inline = 0; for( j=0; j < k; ++j ) { // MAGMA implementation, not just wrapper x2_m = magma_cblas_zdotc( m, A(0,j), incx, B(0,j), incy ); // crashes with MKL 11.1.2, ILP64 #if defined(HAVE_CBLAS) && ! defined(MAGMA_WITH_MKL) #ifdef COMPLEX cblas_zdotc_sub( m, A(0,j), incx, B(0,j), incy, &x2_c ); #else x2_c = cblas_zdotc( m, A(0,j), incx, B(0,j), incy ); #endif error_cblas = max( error_cblas, fabs(x2_m - x2_c) / fabs(m*x2_c) ); #else x2_c = MAGMA_Z_ZERO; error_cblas = SKIPPED_FLAG; #endif // crashes with MKL 11.2.3 and MacOS 10.9 #if (! defined(COMPLEX) || ! defined(MAGMA_WITH_MKL)) && ! defined(__APPLE__) x2_f = blasf77_zdotc( &m, A(0,j), &incx, B(0,j), &incy ); error_fblas = max( error_fblas, fabs(x2_m - x2_f) / fabs(m*x2_f) ); #else x2_f = MAGMA_Z_ZERO; error_fblas = SKIPPED_FLAG; #endif // inline implementation x2_i = MAGMA_Z_ZERO; magma_int_t A_offset = (incx > 0 ? 0 : (-n + 1)*incx); magma_int_t B_offset = (incy > 0 ? 0 : (-n + 1)*incy); for( i=0; i < m; ++i ) { x2_i += conj( *A(A_offset + i*incx,j) ) * *B(B_offset + i*incy,j); } error_inline = max( error_inline, fabs(x2_m - x2_i) / fabs(m*x2_i) ); //printf( "zdotc xm %.8e + %.8ei, xc %.8e + %.8ei, xf %.8e + %.8ei, xi %.8e + %.8ei\n", // real(x2_m), imag(x2_m), // real(x2_c), imag(x2_c), // real(x2_f), imag(x2_f), // real(x2_i), imag(x2_i) ); } output( "zdotc", m, n, k, incx, incy, error_cblas, error_fblas, error_inline ); } } printf( "\n" ); // ----- test ZDOTU // dot columns, Aj^T * Bj for( int iincx = 0; iincx < ninc; ++iincx ) { magma_int_t incx = inc[iincx]; for( int iincy = 0; iincy < ninc; ++iincy ) { magma_int_t incy = inc[iincy]; error_cblas = 0; error_fblas = 0; error_inline = 0; for( j=0; j < k; ++j ) { // MAGMA implementation, not just wrapper x2_m = magma_cblas_zdotu( m, A(0,j), incx, B(0,j), incy ); // crashes with MKL 11.1.2, ILP64 #if defined(HAVE_CBLAS) && ! defined(MAGMA_WITH_MKL) #ifdef COMPLEX cblas_zdotu_sub( m, A(0,j), incx, B(0,j), incy, &x2_c ); #else x2_c = cblas_zdotu( m, A(0,j), incx, B(0,j), incy ); #endif error_cblas = max( error_cblas, fabs(x2_m - x2_c) / fabs(m*x2_c) ); #else x2_c = MAGMA_Z_ZERO; error_cblas = SKIPPED_FLAG; #endif // crashes with MKL 11.2.3 and MacOS 10.9 #if (! defined(COMPLEX) || ! defined(MAGMA_WITH_MKL)) && ! defined(__APPLE__) x2_f = blasf77_zdotu( &m, A(0,j), &incx, B(0,j), &incy ); error_fblas = max( error_fblas, fabs(x2_m - x2_f) / fabs(m*x2_f) ); #else x2_f = MAGMA_Z_ZERO; error_fblas = SKIPPED_FLAG; #endif // inline implementation x2_i = MAGMA_Z_ZERO; magma_int_t A_offset = (incx > 0 ? 0 : (-n + 1)*incx); magma_int_t B_offset = (incy > 0 ? 0 : (-n + 1)*incy); for( i=0; i < m; ++i ) { x2_i += *A(A_offset + i*incx,j) * *B(B_offset + i*incy,j); } error_inline = max( error_inline, fabs(x2_m - x2_i) / fabs(m*x2_i) ); //printf( "zdotu xm %.8e + %.8ei, xc %.8e + %.8ei, xf %.8e + %.8ei, xi %.8e + %.8ei\n", // real(x2_m), imag(x2_m), // real(x2_c), imag(x2_c), // real(x2_f), imag(x2_f), // real(x2_i), imag(x2_i) ); } output( "zdotu", m, n, k, incx, incy, error_cblas, error_fblas, error_inline ); } } // cleanup TESTING_FREE_CPU( A ); TESTING_FREE_CPU( B ); fflush( stdout ); } // itest, incx, incy opts.cleanup(); TESTING_FINALIZE(); return gStatus; }
void test_dot (void) { const double flteps = 1e-4, dbleps = 1e-6; { int N = 1; float alpha = 0.0f; float X[] = { 0.733f }; float Y[] = { 0.825f }; int incX = 1; int incY = -1; float expected = 0.604725f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 1)"); }; { int N = 1; float alpha = 0.1f; float X[] = { 0.733f }; float Y[] = { 0.825f }; int incX = 1; int incY = -1; float expected = 0.704725f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 2)"); }; { int N = 1; float alpha = 1.0f; float X[] = { 0.733f }; float Y[] = { 0.825f }; int incX = 1; int incY = -1; float expected = 1.604725f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 3)"); }; { int N = 1; float alpha = 0.0f; float X[] = { -0.812f }; float Y[] = { -0.667f }; int incX = -1; int incY = 1; float expected = 0.541604f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 4)"); }; { int N = 1; float alpha = 0.1f; float X[] = { -0.812f }; float Y[] = { -0.667f }; int incX = -1; int incY = 1; float expected = 0.641604f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 5)"); }; { int N = 1; float alpha = 1.0f; float X[] = { -0.812f }; float Y[] = { -0.667f }; int incX = -1; int incY = 1; float expected = 1.541604f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 6)"); }; { int N = 1; float alpha = 0.0f; float X[] = { 0.481f }; float Y[] = { 0.523f }; int incX = -1; int incY = -1; float expected = 0.251563f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 7)"); }; { int N = 1; float alpha = 0.1f; float X[] = { 0.481f }; float Y[] = { 0.523f }; int incX = -1; int incY = -1; float expected = 0.351563f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 8)"); }; { int N = 1; float alpha = 1.0f; float X[] = { 0.481f }; float Y[] = { 0.523f }; int incX = -1; int incY = -1; float expected = 1.251563f; float f; f = cblas_sdsdot (N, alpha, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdsdot(case 9)"); }; { int N = 1; float X[] = { 0.785f }; float Y[] = { -0.7f }; int incX = 1; int incY = -1; float expected = -0.5495f; float f; f = cblas_sdot(N, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdot(case 10)"); }; { int N = 1; double X[] = { 0.79 }; double Y[] = { -0.679 }; int incX = 1; int incY = -1; double expected = -0.53641; double f; f = cblas_ddot(N, X, incX, Y, incY); gsl_test_rel(f, expected, dbleps, "ddot(case 11)"); }; { int N = 1; float X[] = { 0.474f, -0.27f }; float Y[] = { -0.144f, -0.392f }; int incX = 1; int incY = -1; float expected[2] = {-0.174096f, -0.146928f}; float f[2]; cblas_cdotu_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], flteps, "cdotu(case 12) real"); gsl_test_rel(f[1], expected[1], flteps, "cdotu(case 12) imag"); }; { int N = 1; float X[] = { 0.474f, -0.27f }; float Y[] = { -0.144f, -0.392f }; int incX = 1; int incY = -1; float expected[2] = {0.037584f, -0.224688f}; float f[2]; cblas_cdotc_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], flteps, "cdotc(case 13) real"); gsl_test_rel(f[1], expected[1], flteps, "cdotc(case 13) imag"); }; { int N = 1; double X[] = { -0.87, -0.631 }; double Y[] = { -0.7, -0.224 }; int incX = 1; int incY = -1; double expected[2] = {0.467656, 0.63658}; double f[2]; cblas_zdotu_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], dbleps, "zdotu(case 14) real"); gsl_test_rel(f[1], expected[1], dbleps, "zdotu(case 14) imag"); }; { int N = 1; double X[] = { -0.87, -0.631 }; double Y[] = { -0.7, -0.224 }; int incX = 1; int incY = -1; double expected[2] = {0.750344, -0.24682}; double f[2]; cblas_zdotc_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], dbleps, "zdotc(case 15) real"); gsl_test_rel(f[1], expected[1], dbleps, "zdotc(case 15) imag"); }; { int N = 1; float X[] = { -0.457f }; float Y[] = { 0.839f }; int incX = -1; int incY = 1; float expected = -0.383423f; float f; f = cblas_sdot(N, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdot(case 16)"); }; { int N = 1; double X[] = { 0.949 }; double Y[] = { -0.873 }; int incX = -1; int incY = 1; double expected = -0.828477; double f; f = cblas_ddot(N, X, incX, Y, incY); gsl_test_rel(f, expected, dbleps, "ddot(case 17)"); }; { int N = 1; float X[] = { 0.852f, -0.045f }; float Y[] = { 0.626f, -0.164f }; int incX = -1; int incY = 1; float expected[2] = {0.525972f, -0.167898f}; float f[2]; cblas_cdotu_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], flteps, "cdotu(case 18) real"); gsl_test_rel(f[1], expected[1], flteps, "cdotu(case 18) imag"); }; { int N = 1; float X[] = { 0.852f, -0.045f }; float Y[] = { 0.626f, -0.164f }; int incX = -1; int incY = 1; float expected[2] = {0.540732f, -0.111558f}; float f[2]; cblas_cdotc_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], flteps, "cdotc(case 19) real"); gsl_test_rel(f[1], expected[1], flteps, "cdotc(case 19) imag"); }; { int N = 1; double X[] = { -0.786, -0.341 }; double Y[] = { -0.271, -0.896 }; int incX = -1; int incY = 1; double expected[2] = {-0.09253, 0.796667}; double f[2]; cblas_zdotu_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], dbleps, "zdotu(case 20) real"); gsl_test_rel(f[1], expected[1], dbleps, "zdotu(case 20) imag"); }; { int N = 1; double X[] = { -0.786, -0.341 }; double Y[] = { -0.271, -0.896 }; int incX = -1; int incY = 1; double expected[2] = {0.518542, 0.611845}; double f[2]; cblas_zdotc_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], dbleps, "zdotc(case 21) real"); gsl_test_rel(f[1], expected[1], dbleps, "zdotc(case 21) imag"); }; { int N = 1; float X[] = { -0.088f }; float Y[] = { -0.165f }; int incX = -1; int incY = -1; float expected = 0.01452f; float f; f = cblas_sdot(N, X, incX, Y, incY); gsl_test_rel(f, expected, flteps, "sdot(case 22)"); }; { int N = 1; double X[] = { -0.434 }; double Y[] = { -0.402 }; int incX = -1; int incY = -1; double expected = 0.174468; double f; f = cblas_ddot(N, X, incX, Y, incY); gsl_test_rel(f, expected, dbleps, "ddot(case 23)"); }; { int N = 1; float X[] = { -0.347f, 0.899f }; float Y[] = { -0.113f, -0.858f }; int incX = -1; int incY = -1; float expected[2] = {0.810553f, 0.196139f}; float f[2]; cblas_cdotu_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], flteps, "cdotu(case 24) real"); gsl_test_rel(f[1], expected[1], flteps, "cdotu(case 24) imag"); }; { int N = 1; float X[] = { -0.347f, 0.899f }; float Y[] = { -0.113f, -0.858f }; int incX = -1; int incY = -1; float expected[2] = {-0.732131f, 0.399313f}; float f[2]; cblas_cdotc_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], flteps, "cdotc(case 25) real"); gsl_test_rel(f[1], expected[1], flteps, "cdotc(case 25) imag"); }; { int N = 1; double X[] = { -0.897, -0.204 }; double Y[] = { -0.759, 0.557 }; int incX = -1; int incY = -1; double expected[2] = {0.794451, -0.344793}; double f[2]; cblas_zdotu_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], dbleps, "zdotu(case 26) real"); gsl_test_rel(f[1], expected[1], dbleps, "zdotu(case 26) imag"); }; { int N = 1; double X[] = { -0.897, -0.204 }; double Y[] = { -0.759, 0.557 }; int incX = -1; int incY = -1; double expected[2] = {0.567195, -0.654465}; double f[2]; cblas_zdotc_sub(N, X, incX, Y, incY, &f); gsl_test_rel(f[0], expected[0], dbleps, "zdotc(case 27) real"); gsl_test_rel(f[1], expected[1], dbleps, "zdotc(case 27) imag"); }; }
void wrapper_cblas_zdotu_sub(const int N, const void *X, const int incX, const void *Y, const int incY, void *dotu) { cblas_zdotu_sub(N, X, incX, Y, incY, dotu); }
int main() { double time; int size = N; int one = 1; // double* da = (double*) malloc(2*N*sizeof(double)); double* db = (double*) malloc(2*N*sizeof(double)); double* cc = (double*) malloc(2* sizeof(double)); // for (int ii = 0; ii < 2*N; ii = ii + 2) { da[ii + 0] = 0; //rand()%10; // real da[ii + 1] = 1; //rand()%10; // imaginary // db[ii + 0] = 2; //rand()%10; // real db[ii + 1] = 3; //rand()%10; // imaginary } // cc[0] = 0.0; // real cc[1] = 0.0; // imaginary // time = -myseconds(); asm volatile("# mkl"); zdotu_aos(N, da, one, db, one, cc); time += myseconds(); // assert(cc[1] != (double) N); printf( "AOS: The complex dot product is: (%6.2f, %6.2f), bandwidth = %f GB/s, perf = %f Gflops/s (%f s.)\n", cc[0], cc[1], 2*N*8/1024./1024/1024/time, 2.*N/1e9/time, time ); // time = -myseconds(); cblas_zdotu_sub(N, da, one, db, one, cc); time += myseconds(); printf( "MKL: The complex dot product is: (%6.2f, %6.2f), bandwidth = %f GB/s, perf = %f Gflops/s (%f s.)\n", cc[0], cc[1], 2*N*8/1024./1024/1024/time, 2.*N/1e9/time, time ); free(da); free(db); // da = (double*) malloc(N*sizeof(double)); db = (double*) malloc(N*sizeof(double)); double* dc = (double*) malloc(N*sizeof(double)); double* dd = (double*) malloc(N*sizeof(double)); // for (int ii = 0; ii < N; ++ii) { da[ii] = 0; //rand()%10; // real db[ii] = 1; //rand()%10; // imaginary // dc[ii] = 2; //rand()%10; // real dd[ii] = 3; //rand()%10; // imaginary } // time = -myseconds(); asm volatile("# mkl"); zdotu_soa(N, da, db, one, dc, dd, one, cc); time += myseconds(); // assert(cc[1] != (double) N); printf( "SOA: The complex dot product is: (%6.2f, %6.2f), bandwidth = %f GB/s, perf = %f Gflops/s (%f s.)\n", cc[0], cc[1], 2*N*8/1024./1024/1024/time, 2.*N/1e9/time, time ); // free(da); free(db); free(dc); free(dd); // free(cc); }
void WRAP_F77(acc_zdotu_sub)(const int *N, const void *X, const int *incX, const void *Y, const int *incY, void *dotu) { cblas_zdotu_sub(*N, X, *incX, Y, *incY, dotu); }
complex double zdotu_(int *N, void *CX, int *INCX, void *CY, int *INCY) { complex double dotu; cblas_zdotu_sub(*N, CX, *INCX, CY, *INCY, &dotu); return dotu; }
void F77_zdotu(const int *N, void *X, const int *incX, void *Y, const int *incY,void *dotu) { cblas_zdotu_sub(*N, X, *incX, Y, *incY, dotu); return; }
// // Overloaded function for dispatching to // * CBLAS backend, and // * complex<double> value-type. // inline std::complex<double> dot( const int n, const std::complex<double>* x, const int incx, const std::complex<double>* y, const int incy ) { std::complex<double> result; cblas_zdotu_sub( n, x, incx, y, incy, &result ); return result; }
DLLEXPORT openblas_complex_double z_dot_product(const blasint n, const openblas_complex_double x[], const openblas_complex_double y[]) { openblas_complex_double ret; cblas_zdotu_sub(n, (double*)x, 1, (double*)y, 1, &ret); return ret; }
void WRAP_F77(veclib_zdotu)(const int *N, const double complex *X, const int *incX, const double complex *Y, const int *incY, double complex *dotu) { cblas_zdotu_sub(*N, X, *incX, Y, *incY, dotu); }