void eblas_dznrm2_sub(size_t iStart, size_t iStop, const complex* x, int incx, double* ret, std::mutex* lock) { //Compute this thread's contribution: double retSub = cblas_dznrm2(iStop-iStart, x+incx*iStart, incx); //Accumulate over threads (need sync): lock->lock(); *ret += retSub*retSub; lock->unlock(); }
double eblas_dznrm2(int N, const complex* x, int incx) { #ifdef MKL_PROVIDES_BLAS return cblas_dznrm2(N, x, incx); #else double ret = 0.; std::mutex lock; threadLaunch((N<100000) ? 1 : 0, eblas_dznrm2_sub, N, x, incx, &ret, &lock); return sqrt(ret); #endif }
VALUE rb_blas_xnrm2(int argc, VALUE *argv, VALUE self) { Matrix *dx; int incx; int incy; int n; //char error_msg[64]; VALUE n_value, incx_value; rb_scan_args(argc, argv, "02", &incx_value, &n_value); Data_Get_Struct(self, Matrix, dx); if(incx_value == Qnil) incx = 1; else incx = NUM2INT(incx_value); if(n_value == Qnil) n = dx->nrows; else n = NUM2INT(n_value); if(dx == NULL || dx->ncols != 1) { //sprintf(error_msg, "Self is not a Vector"); rb_raise(rb_eRuntimeError, "Self is not a Vector"); } switch(dx->data_type) { case Single_t: //s return rb_float_new(cblas_snrm2(n , (float *)dx->data, incx)); case Double_t: //d return rb_float_new(cblas_dnrm2(n , (double *)dx->data, incx)); case Complex_t: //c return rb_float_new(cblas_scnrm2(n , dx->data, incx)); case Double_Complex_t: //z return rb_float_new(cblas_dznrm2(n , dx->data, incx)); default: //sprintf(error_msg, "Invalid data_type (%d) in Matrix", dx->data_type); rb_raise(rb_eRuntimeError, "Invalid data_type (%d) in Matrix", dx->data_type); return Qnil; //Never reaches here. } }
// ---------------------------------------- int main( int argc, char** argv ) { TESTING_INIT(); //real_Double_t t_m, t_c, t_f; magma_int_t ione = 1; magmaDoubleComplex *A, *B; double diff, error; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t m, n, k, size, maxn, ld; magmaDoubleComplex x2_m, x2_c; // complex x for magma, cblas/fortran blas respectively double x_m, x_c; // x for magma, cblas/fortran blas respectively magma_opts opts; parse_opts( argc, argv, &opts ); opts.tolerance = max( 100., opts.tolerance ); double tol = opts.tolerance * lapackf77_dlamch("E"); gTol = tol; printf( "!! Calling these CBLAS and Fortran BLAS sometimes crashes (segfault), which !!\n" "!! is why we use wrappers. It does not necesarily indicate a bug in MAGMA. !!\n" "\n" "Diff compares MAGMA wrapper to CBLAS and BLAS function; should be exactly 0.\n" "Error compares MAGMA implementation to CBLAS and BLAS function; should be ~ machine epsilon.\n" "\n" ); double total_diff = 0.; double total_error = 0.; int inc[] = { 1 }; //{ -2, -1, 1, 2 }; //{ 1 }; //{ -1, 1 }; int ninc = sizeof(inc)/sizeof(*inc); for( int itest = 0; itest < opts.ntest; ++itest ) { m = opts.msize[itest]; n = opts.nsize[itest]; k = opts.ksize[itest]; for( int iincx = 0; iincx < ninc; ++iincx ) { magma_int_t incx = inc[iincx]; for( int iincy = 0; iincy < ninc; ++iincy ) { magma_int_t incy = inc[iincy]; printf("=========================================================================\n"); printf( "m=%d, n=%d, k=%d, incx = %d, incy = %d\n", (int) m, (int) n, (int) k, (int) incx, (int) incy ); printf( "Function MAGMA CBLAS BLAS Diff Error\n" " msec msec msec\n" ); // allocate matrices // over-allocate so they can be any combination of // {m,n,k} * {abs(incx), abs(incy)} by // {m,n,k} * {abs(incx), abs(incy)} maxn = max( max( m, n ), k ) * max( abs(incx), abs(incy) ); ld = max( 1, maxn ); size = ld*maxn; magma_zmalloc_pinned( &A, size ); assert( A != NULL ); magma_zmalloc_pinned( &B, size ); assert( B != NULL ); // initialize matrices lapackf77_zlarnv( &ione, ISEED, &size, A ); lapackf77_zlarnv( &ione, ISEED, &size, B ); printf( "Level 1 BLAS ----------------------------------------------------------\n" ); // ----- test DZASUM // get one-norm of column j of A if ( incx > 0 && incx == incy ) { // positive, no incy diff = 0; error = 0; for( int j = 0; j < k; ++j ) { x_m = magma_cblas_dzasum( m, A(0,j), incx ); x_c = cblas_dzasum( m, A(0,j), incx ); diff += fabs( x_m - x_c ); x_c = blasf77_dzasum( &m, A(0,j), &incx ); error += fabs( (x_m - x_c) / (m*x_c) ); } output( "dzasum", diff, error ); total_diff += diff; total_error += error; } // ----- test DZNRM2 // get two-norm of column j of A if ( incx > 0 && incx == incy ) { // positive, no incy diff = 0; error = 0; for( int j = 0; j < k; ++j ) { x_m = magma_cblas_dznrm2( m, A(0,j), incx ); x_c = cblas_dznrm2( m, A(0,j), incx ); diff += fabs( x_m - x_c ); x_c = blasf77_dznrm2( &m, A(0,j), &incx ); error += fabs( (x_m - x_c) / (m*x_c) ); } output( "dznrm2", diff, error ); total_diff += diff; total_error += error; } // ----- test ZDOTC // dot columns, Aj^H Bj diff = 0; error = 0; for( int j = 0; j < k; ++j ) { // MAGMA implementation, not just wrapper x2_m = magma_cblas_zdotc( m, A(0,j), incx, B(0,j), incy ); // crashes on MKL 11.1.2, ILP64 #if ! defined( MAGMA_WITH_MKL ) #ifdef COMPLEX cblas_zdotc_sub( m, A(0,j), incx, B(0,j), incy, &x2_c ); #else x2_c = cblas_zdotc( m, A(0,j), incx, B(0,j), incy ); #endif error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif // crashes on MacOS 10.9 #if ! defined( __APPLE__ ) x2_c = blasf77_zdotc( &m, A(0,j), &incx, B(0,j), &incy ); error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif } output( "zdotc", diff, error ); total_diff += diff; total_error += error; total_error += error; // ----- test ZDOTU // dot columns, Aj^T * Bj diff = 0; error = 0; for( int j = 0; j < k; ++j ) { // MAGMA implementation, not just wrapper x2_m = magma_cblas_zdotu( m, A(0,j), incx, B(0,j), incy ); // crashes on MKL 11.1.2, ILP64 #if ! defined( MAGMA_WITH_MKL ) #ifdef COMPLEX cblas_zdotu_sub( m, A(0,j), incx, B(0,j), incy, &x2_c ); #else x2_c = cblas_zdotu( m, A(0,j), incx, B(0,j), incy ); #endif error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif // crashes on MacOS 10.9 #if ! defined( __APPLE__ ) x2_c = blasf77_zdotu( &m, A(0,j), &incx, B(0,j), &incy ); error += fabs( x2_m - x2_c ) / fabs( m*x2_c ); #endif } output( "zdotu", diff, error ); total_diff += diff; total_error += error; // tell user about disabled functions #if defined( MAGMA_WITH_MKL ) printf( "cblas_zdotc and cblas_zdotu disabled with MKL (segfaults)\n" ); #endif #if defined( __APPLE__ ) printf( "blasf77_zdotc and blasf77_zdotu disabled on MacOS (segfaults)\n" ); #endif // cleanup magma_free_pinned( A ); magma_free_pinned( B ); fflush( stdout ); }}} // itest, incx, incy // TODO use average error? printf( "sum diffs = %8.2g, MAGMA wrapper compared to CBLAS and Fortran BLAS; should be exactly 0.\n" "sum errors = %8.2e, MAGMA implementation compared to CBLAS and Fortran BLAS; should be ~ machine epsilon.\n\n", total_diff, total_error ); if ( total_diff != 0. ) { printf( "some tests failed diff == 0.; see above.\n" ); } else { printf( "all tests passed diff == 0.\n" ); } TESTING_FINALIZE(); int status = (total_diff != 0.); return status; }
// ---------------------------------------- int main( int argc, char** argv ) { TESTING_INIT(); //real_Double_t t_m, t_c, t_f; magma_int_t ione = 1; magmaDoubleComplex *A, *B; double error_cblas, error_fblas, error_inline; magma_int_t ISEED[4] = {0,0,0,1}; magma_int_t i, j, k, m, n, size, maxn, ld; // complex x for magma, cblas, fortran, inline blas respectively magmaDoubleComplex x2_m, x2_c, x2_f, x2_i; // real x for magma, cblas, fortran, inline blas respectively double x_m, x_c, x_f, x_i; MAGMA_UNUSED( x_c ); MAGMA_UNUSED( x_f ); MAGMA_UNUSED( x2_c ); MAGMA_UNUSED( x2_f ); MAGMA_UNUSED( x2_m ); magma_opts opts; opts.parse_opts( argc, argv ); opts.tolerance = max( 100., opts.tolerance ); double tol = opts.tolerance * lapackf77_dlamch("E"); gTol = tol; magma_int_t inc[] = { -2, -1, 1, 2 }; //{ 1 }; //{ -1, 1 }; magma_int_t ninc = sizeof(inc)/sizeof(*inc); magma_int_t maxinc = 0; for( i=0; i < ninc; ++i ) { maxinc = max( maxinc, abs(inc[i]) ); } printf( "!! Calling these CBLAS and Fortran BLAS sometimes crashes (segfaults), which !!\n" "!! is why we use wrappers. It does not necesarily indicate a bug in MAGMA. !!\n" "!! If MAGMA_WITH_MKL or __APPLE__ are defined, known failures are skipped. !!\n" "\n" ); // tell user about disabled functions #ifndef HAVE_CBLAS printf( "n/a: HAVE_CBLAS not defined, so no cblas functions tested.\n\n" ); #endif #if defined(MAGMA_WITH_MKL) printf( "n/a: cblas_zdotc, cblas_zdotu, blasf77_zdotc, and blasf77_zdotu are disabled with MKL, due to segfaults.\n\n" ); #endif #if defined(__APPLE__) printf( "n/a: blasf77_zdotc and blasf77_zdotu are disabled on MacOS, due to segfaults.\n\n" ); #endif printf( "%% Error w.r.t. Error w.r.t. Error w.r.t.\n" "%% M N K incx incy Function CBLAS Fortran BLAS inline\n" "%%====================================================================================\n" ); for( int itest = 0; itest < opts.ntest; ++itest ) { if ( itest > 0 ) { printf( "%%----------------------------------------------------------------------\n" ); } m = opts.msize[itest]; n = opts.nsize[itest]; k = opts.ksize[itest]; // allocate matrices // over-allocate so they can be any combination of // {m,n,k} * {abs(incx), abs(incy)} by // {m,n,k} * {abs(incx), abs(incy)} maxn = max( max( m, n ), k ) * maxinc; ld = max( 1, maxn ); size = ld*maxn; TESTING_MALLOC_CPU( A, magmaDoubleComplex, size ); TESTING_MALLOC_CPU( B, magmaDoubleComplex, size ); // initialize matrices lapackf77_zlarnv( &ione, ISEED, &size, A ); lapackf77_zlarnv( &ione, ISEED, &size, B ); // ----- test DZASUM for( int iincx = 0; iincx < ninc; ++iincx ) { magma_int_t incx = inc[iincx]; for( int iincy = 0; iincy < ninc; ++iincy ) { magma_int_t incy = inc[iincy]; // get one-norm of column j of A if ( incx > 0 && incx == incy ) { // positive, no incy error_cblas = 0; error_fblas = 0; error_inline = 0; for( j=0; j < k; ++j ) { x_m = magma_cblas_dzasum( m, A(0,j), incx ); #ifdef HAVE_CBLAS x_c = cblas_dzasum( m, A(0,j), incx ); error_cblas = max( error_cblas, fabs(x_m - x_c) / fabs(m*x_c) ); #else x_c = 0; error_cblas = SKIPPED_FLAG; #endif x_f = blasf77_dzasum( &m, A(0,j), &incx ); error_fblas = max( error_fblas, fabs(x_m - x_f) / fabs(m*x_f) ); // inline implementation x_i = 0; for( i=0; i < m; ++i ) { x_i += MAGMA_Z_ABS1( *A(i*incx,j) ); // |real(Aij)| + |imag(Aij)| } error_inline = max( error_inline, fabs(x_m - x_i) / fabs(m*x_i) ); //printf( "dzasum xm %.8e, xc %.8e, xf %.8e, xi %.8e\n", x_m, x_c, x_f, x_i ); } output( "dzasum", m, n, k, incx, incy, error_cblas, error_fblas, error_inline ); } } } printf( "\n" ); // ----- test DZNRM2 // get two-norm of column j of A for( int iincx = 0; iincx < ninc; ++iincx ) { magma_int_t incx = inc[iincx]; for( int iincy = 0; iincy < ninc; ++iincy ) { magma_int_t incy = inc[iincy]; if ( incx > 0 && incx == incy ) { // positive, no incy error_cblas = 0; error_fblas = 0; error_inline = 0; for( j=0; j < k; ++j ) { x_m = magma_cblas_dznrm2( m, A(0,j), incx ); #ifdef HAVE_CBLAS x_c = cblas_dznrm2( m, A(0,j), incx ); error_cblas = max( error_cblas, fabs(x_m - x_c) / fabs(m*x_c) ); #else x_c = 0; error_cblas = SKIPPED_FLAG; #endif x_f = blasf77_dznrm2( &m, A(0,j), &incx ); error_fblas = max( error_fblas, fabs(x_m - x_f) / fabs(m*x_f) ); // inline implementation (poor -- doesn't scale) x_i = 0; for( i=0; i < m; ++i ) { x_i += real( *A(i*incx,j) ) * real( *A(i*incx,j) ) + imag( *A(i*incx,j) ) * imag( *A(i*incx,j) ); // same: real( conj( *A(i*incx,j) ) * *A(i*incx,j) ); } x_i = sqrt( x_i ); error_inline = max( error_inline, fabs(x_m - x_i) / fabs(m*x_i) ); //printf( "dznrm2 xm %.8e, xc %.8e, xf %.8e, xi %.8e\n", x_m, x_c, x_f, x_i ); } output( "dznrm2", m, n, k, incx, incy, error_cblas, error_fblas, error_inline ); } } } printf( "\n" ); // ----- test ZDOTC // dot columns, Aj^H Bj for( int iincx = 0; iincx < ninc; ++iincx ) { magma_int_t incx = inc[iincx]; for( int iincy = 0; iincy < ninc; ++iincy ) { magma_int_t incy = inc[iincy]; error_cblas = 0; error_fblas = 0; error_inline = 0; for( j=0; j < k; ++j ) { // MAGMA implementation, not just wrapper x2_m = magma_cblas_zdotc( m, A(0,j), incx, B(0,j), incy ); // crashes with MKL 11.1.2, ILP64 #if defined(HAVE_CBLAS) && ! defined(MAGMA_WITH_MKL) #ifdef COMPLEX cblas_zdotc_sub( m, A(0,j), incx, B(0,j), incy, &x2_c ); #else x2_c = cblas_zdotc( m, A(0,j), incx, B(0,j), incy ); #endif error_cblas = max( error_cblas, fabs(x2_m - x2_c) / fabs(m*x2_c) ); #else x2_c = MAGMA_Z_ZERO; error_cblas = SKIPPED_FLAG; #endif // crashes with MKL 11.2.3 and MacOS 10.9 #if (! defined(COMPLEX) || ! defined(MAGMA_WITH_MKL)) && ! defined(__APPLE__) x2_f = blasf77_zdotc( &m, A(0,j), &incx, B(0,j), &incy ); error_fblas = max( error_fblas, fabs(x2_m - x2_f) / fabs(m*x2_f) ); #else x2_f = MAGMA_Z_ZERO; error_fblas = SKIPPED_FLAG; #endif // inline implementation x2_i = MAGMA_Z_ZERO; magma_int_t A_offset = (incx > 0 ? 0 : (-n + 1)*incx); magma_int_t B_offset = (incy > 0 ? 0 : (-n + 1)*incy); for( i=0; i < m; ++i ) { x2_i += conj( *A(A_offset + i*incx,j) ) * *B(B_offset + i*incy,j); } error_inline = max( error_inline, fabs(x2_m - x2_i) / fabs(m*x2_i) ); //printf( "zdotc xm %.8e + %.8ei, xc %.8e + %.8ei, xf %.8e + %.8ei, xi %.8e + %.8ei\n", // real(x2_m), imag(x2_m), // real(x2_c), imag(x2_c), // real(x2_f), imag(x2_f), // real(x2_i), imag(x2_i) ); } output( "zdotc", m, n, k, incx, incy, error_cblas, error_fblas, error_inline ); } } printf( "\n" ); // ----- test ZDOTU // dot columns, Aj^T * Bj for( int iincx = 0; iincx < ninc; ++iincx ) { magma_int_t incx = inc[iincx]; for( int iincy = 0; iincy < ninc; ++iincy ) { magma_int_t incy = inc[iincy]; error_cblas = 0; error_fblas = 0; error_inline = 0; for( j=0; j < k; ++j ) { // MAGMA implementation, not just wrapper x2_m = magma_cblas_zdotu( m, A(0,j), incx, B(0,j), incy ); // crashes with MKL 11.1.2, ILP64 #if defined(HAVE_CBLAS) && ! defined(MAGMA_WITH_MKL) #ifdef COMPLEX cblas_zdotu_sub( m, A(0,j), incx, B(0,j), incy, &x2_c ); #else x2_c = cblas_zdotu( m, A(0,j), incx, B(0,j), incy ); #endif error_cblas = max( error_cblas, fabs(x2_m - x2_c) / fabs(m*x2_c) ); #else x2_c = MAGMA_Z_ZERO; error_cblas = SKIPPED_FLAG; #endif // crashes with MKL 11.2.3 and MacOS 10.9 #if (! defined(COMPLEX) || ! defined(MAGMA_WITH_MKL)) && ! defined(__APPLE__) x2_f = blasf77_zdotu( &m, A(0,j), &incx, B(0,j), &incy ); error_fblas = max( error_fblas, fabs(x2_m - x2_f) / fabs(m*x2_f) ); #else x2_f = MAGMA_Z_ZERO; error_fblas = SKIPPED_FLAG; #endif // inline implementation x2_i = MAGMA_Z_ZERO; magma_int_t A_offset = (incx > 0 ? 0 : (-n + 1)*incx); magma_int_t B_offset = (incy > 0 ? 0 : (-n + 1)*incy); for( i=0; i < m; ++i ) { x2_i += *A(A_offset + i*incx,j) * *B(B_offset + i*incy,j); } error_inline = max( error_inline, fabs(x2_m - x2_i) / fabs(m*x2_i) ); //printf( "zdotu xm %.8e + %.8ei, xc %.8e + %.8ei, xf %.8e + %.8ei, xi %.8e + %.8ei\n", // real(x2_m), imag(x2_m), // real(x2_c), imag(x2_c), // real(x2_f), imag(x2_f), // real(x2_i), imag(x2_i) ); } output( "zdotu", m, n, k, incx, incy, error_cblas, error_fblas, error_inline ); } } // cleanup TESTING_FREE_CPU( A ); TESTING_FREE_CPU( B ); fflush( stdout ); } // itest, incx, incy opts.cleanup(); TESTING_FINALIZE(); return gStatus; }
extern "C" magma_int_t magma_zgeev(magma_vec_t jobvl, magma_vec_t jobvr, magma_int_t n, magmaDoubleComplex *a, magma_int_t lda, magmaDoubleComplex *geev_w_array, magmaDoubleComplex *vl, magma_int_t ldvl, magmaDoubleComplex *vr, magma_int_t ldvr, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t *info, magma_queue_t queue) { /* -- clMAGMA (version 1.0.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver September 2012 Purpose ======= ZGEEV computes for an N-by-N complex nonsymmetric matrix A, the eigenvalues and, optionally, the left and/or right eigenvectors. The right eigenvector v(j) of A satisfies A * v(j) = lambda(j) * v(j) where lambda(j) is its eigenvalue. The left eigenvector u(j) of A satisfies u(j)**H * A = lambda(j) * u(j)**H where u(j)**H denotes the conjugate transpose of u(j). The computed eigenvectors are normalized to have Euclidean norm equal to 1 and largest component real. Arguments ========= JOBVL (input) CHARACTER*1 = 'N': left eigenvectors of A are not computed; = 'V': left eigenvectors of are computed. JOBVR (input) CHARACTER*1 = 'N': right eigenvectors of A are not computed; = 'V': right eigenvectors of A are computed. N (input) INTEGER The order of the matrix A. N >= 0. A (input/output) COMPLEX*16 array, dimension (LDA,N) On entry, the N-by-N matrix A. On exit, A has been overwritten. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). W (output) COMPLEX*16 array, dimension (N) W contains the computed eigenvalues. VL (output) COMPLEX*16 array, dimension (LDVL,N) If JOBVL = 'V', the left eigenvectors u(j) are stored one after another in the columns of VL, in the same order as their eigenvalues. If JOBVL = 'N', VL is not referenced. u(j) = VL(:,j), the j-th column of VL. LDVL (input) INTEGER The leading dimension of the array VL. LDVL >= 1; if JOBVL = 'V', LDVL >= N. VR (output) COMPLEX*16 array, dimension (LDVR,N) If JOBVR = 'V', the right eigenvectors v(j) are stored one after another in the columns of VR, in the same order as their eigenvalues. If JOBVR = 'N', VR is not referenced. v(j) = VR(:,j), the j-th column of VR. LDVR (input) INTEGER The leading dimension of the array VR. LDVR >= 1; if JOBVR = 'V', LDVR >= N. WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= (1+nb)*N. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. RWORK (workspace) DOUBLE PRECISION array, dimension (2*N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if INFO = i, the QR algorithm failed to compute all the eigenvalues, and no eigenvectors have been computed; elements and i+1:N of W contain eigenvalues which have converged. ===================================================================== */ magma_int_t c__1 = 1; magma_int_t c__0 = 0; magma_int_t a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, i__2, i__3; double d__1, d__2; magmaDoubleComplex z__1, z__2; magma_int_t i__, k, ihi; double scl; magma_int_t ilo; double dum[1], eps; magmaDoubleComplex tmp; magma_int_t ibal; double anrm; magma_int_t ierr, itau, iwrk, nout; magma_int_t scalea; double cscale; magma_int_t select[1]; double bignum; magma_int_t minwrk; magma_int_t wantvl; double smlnum; magma_int_t irwork; magma_int_t lquery, wantvr; magma_int_t nb = 0; magmaDoubleComplex_ptr dT; //magma_timestr_t start, end; char side[2] = {0, 0}; magma_vec_t jobvl_ = jobvl; magma_vec_t jobvr_ = jobvr; *info = 0; lquery = lwork == -1; wantvl = lapackf77_lsame(lapack_const(jobvl_), "V"); wantvr = lapackf77_lsame(lapack_const(jobvr_), "V"); if (! wantvl && ! lapackf77_lsame(lapack_const(jobvl_), "N")) { *info = -1; } else if (! wantvr && ! lapackf77_lsame(lapack_const(jobvr_), "N")) { *info = -2; } else if (n < 0) { *info = -3; } else if (lda < max(1,n)) { *info = -5; } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) { *info = -8; } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) { *info = -10; } /* Compute workspace */ if (*info == 0) { nb = magma_get_zgehrd_nb(n); minwrk = (1+nb)*n; work[0] = MAGMA_Z_MAKE((double) minwrk, 0.); if (lwork < minwrk && ! lquery) { *info = -12; } } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } else if (lquery) { return *info; } /* Quick return if possible */ if (n == 0) { return *info; } // if eigenvectors are needed #if defined(VERSION3) if (MAGMA_SUCCESS != magma_malloc(&dT, nb*n*sizeof(magmaDoubleComplex) )) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } #endif a_dim1 = lda; a_offset = 1 + a_dim1; a -= a_offset; vl_dim1 = ldvl; vl_offset = 1 + vl_dim1; vl -= vl_offset; vr_dim1 = ldvr; vr_offset = 1 + vr_dim1; vr -= vr_offset; --work; --rwork; /* Get machine constants */ eps = lapackf77_dlamch("P"); smlnum = lapackf77_dlamch("S"); bignum = 1. / smlnum; lapackf77_dlabad(&smlnum, &bignum); smlnum = magma_dsqrt(smlnum) / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = lapackf77_zlange("M", &n, &n, &a[a_offset], &lda, dum); scalea = 0; if (anrm > 0. && anrm < smlnum) { scalea = 1; cscale = smlnum; } else if (anrm > bignum) { scalea = 1; cscale = bignum; } if (scalea) { lapackf77_zlascl("G", &c__0, &c__0, &anrm, &cscale, &n, &n, &a[a_offset], &lda, & ierr); } /* Balance the matrix (CWorkspace: none) (RWorkspace: need N) */ ibal = 1; lapackf77_zgebal("B", &n, &a[a_offset], &lda, &ilo, &ihi, &rwork[ibal], &ierr); /* Reduce to upper Hessenberg form (CWorkspace: need 2*N, prefer N+N*NB) (RWorkspace: none) */ itau = 1; iwrk = itau + n; i__1 = lwork - iwrk + 1; //start = get_current_time(); #if defined(VERSION1) /* * Version 1 - LAPACK */ lapackf77_zgehrd(&n, &ilo, &ihi, &a[a_offset], &lda, &work[itau], &work[iwrk], &i__1, &ierr); #elif defined(VERSION2) /* * Version 2 - LAPACK consistent HRD */ magma_zgehrd2(n, ilo, ihi, &a[a_offset], lda, &work[itau], &work[iwrk], &i__1, &ierr); #elif defined(VERSION3) /* * Version 3 - LAPACK consistent MAGMA HRD + matrices T stored, */ magma_zgehrd(n, ilo, ihi, &a[a_offset], lda, &work[itau], &work[iwrk], i__1, dT, 0, &ierr, queue); #endif //end = get_current_time(); //printf(" Time for zgehrd = %5.2f sec\n", GetTimerValue(start,end)/1000.); if (wantvl) { /* Want left eigenvectors Copy Householder vectors to VL */ side[0] = 'L'; lapackf77_zlacpy(MagmaLowerStr, &n, &n, &a[a_offset], &lda, &vl[vl_offset], &ldvl); /* Generate unitary matrix in VL (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) (RWorkspace: none) */ i__1 = lwork - iwrk + 1; //start = get_current_time(); #if defined(VERSION1) || defined(VERSION2) /* * Version 1 & 2 - LAPACK */ lapackf77_zunghr(&n, &ilo, &ihi, &vl[vl_offset], &ldvl, &work[itau], &work[iwrk], &i__1, &ierr); #elif defined(VERSION3) /* * Version 3 - LAPACK consistent MAGMA HRD + matrices T stored */ magma_zunghr(n, ilo, ihi, &vl[vl_offset], ldvl, &work[itau], dT, 0, nb, &ierr, queue); #endif //end = get_current_time(); //printf(" Time for zunghr = %5.2f sec\n", GetTimerValue(start,end)/1000.); /* Perform QR iteration, accumulating Schur vectors in VL (CWorkspace: need 1, prefer HSWORK (see comments) ) (RWorkspace: none) */ iwrk = itau; i__1 = lwork - iwrk + 1; lapackf77_zhseqr("S", "V", &n, &ilo, &ihi, &a[a_offset], &lda, geev_w_array, &vl[vl_offset], &ldvl, &work[iwrk], &i__1, info); if (wantvr) { /* Want left and right eigenvectors Copy Schur vectors to VR */ side[0] = 'B'; lapackf77_zlacpy("F", &n, &n, &vl[vl_offset], &ldvl, &vr[vr_offset], &ldvr); } } else if (wantvr) { /* Want right eigenvectors Copy Householder vectors to VR */ side[0] = 'R'; lapackf77_zlacpy("L", &n, &n, &a[a_offset], &lda, &vr[vr_offset], &ldvr); /* Generate unitary matrix in VR (CWorkspace: need 2*N-1, prefer N+(N-1)*NB) (RWorkspace: none) */ i__1 = lwork - iwrk + 1; //start = get_current_time(); #if defined(VERSION1) || defined(VERSION2) /* * Version 1 & 2 - LAPACK */ lapackf77_zunghr(&n, &ilo, &ihi, &vr[vr_offset], &ldvr, &work[itau], &work[iwrk], &i__1, &ierr); #elif defined(VERSION3) /* * Version 3 - LAPACK consistent MAGMA HRD + matrices T stored */ magma_zunghr(n, ilo, ihi, &vr[vr_offset], ldvr, &work[itau], dT, 0, nb, &ierr, queue); #endif //end = get_current_time(); //printf(" Time for zunghr = %5.2f sec\n", GetTimerValue(start,end)/1000.); /* Perform QR iteration, accumulating Schur vectors in VR (CWorkspace: need 1, prefer HSWORK (see comments) ) (RWorkspace: none) */ iwrk = itau; i__1 = lwork - iwrk + 1; lapackf77_zhseqr("S", "V", &n, &ilo, &ihi, &a[a_offset], &lda, geev_w_array, &vr[vr_offset], &ldvr, &work[iwrk], &i__1, info); } else { /* Compute eigenvalues only (CWorkspace: need 1, prefer HSWORK (see comments) ) (RWorkspace: none) */ iwrk = itau; i__1 = lwork - iwrk + 1; lapackf77_zhseqr("E", "N", &n, &ilo, &ihi, &a[a_offset], &lda, geev_w_array, &vr[vr_offset], &ldvr, &work[iwrk], &i__1, info); } /* If INFO > 0 from ZHSEQR, then quit */ if (*info > 0) { goto L50; } if (wantvl || wantvr) { /* Compute left and/or right eigenvectors (CWorkspace: need 2*N) (RWorkspace: need 2*N) */ irwork = ibal + n; lapackf77_ztrevc(side, "B", select, &n, &a[a_offset], &lda, &vl[vl_offset], &ldvl, &vr[vr_offset], &ldvr, &n, &nout, &work[iwrk], &rwork[irwork], &ierr); } if (wantvl) { /* Undo balancing of left eigenvectors (CWorkspace: none) (RWorkspace: need N) */ lapackf77_zgebak("B", "L", &n, &ilo, &ihi, &rwork[ibal], &n, &vl[vl_offset], &ldvl, &ierr); /* Normalize left eigenvectors and make largest component real */ for (i__ = 1; i__ <= n; ++i__) { scl = 1. / cblas_dznrm2(n, &vl[i__ * vl_dim1 + 1], 1); cblas_zdscal(n, scl, &vl[i__ * vl_dim1 + 1], 1); i__2 = n; for (k = 1; k <= i__2; ++k) { i__3 = k + i__ * vl_dim1; /* Computing 2nd power */ d__1 = MAGMA_Z_REAL(vl[i__3]); /* Computing 2nd power */ d__2 = MAGMA_Z_IMAG(vl[k + i__ * vl_dim1]); rwork[irwork + k - 1] = d__1 * d__1 + d__2 * d__2; } /* Comment: Fortran BLAS does not have to add 1 C BLAS must add one to cblas_idamax */ k = cblas_idamax(n, &rwork[irwork], 1)+1; z__2 = MAGMA_Z_CNJG(vl[k + i__ * vl_dim1]); d__1 = magma_dsqrt(rwork[irwork + k - 1]); MAGMA_Z_DSCALE(z__1, z__2, d__1); tmp = z__1; cblas_zscal(n, CBLAS_SADDR(tmp), &vl[i__ * vl_dim1 + 1], 1); i__2 = k + i__ * vl_dim1; i__3 = k + i__ * vl_dim1; d__1 = MAGMA_Z_REAL(vl[i__3]); MAGMA_Z_SET2REAL(z__1, d__1); vl[i__2] = z__1; } } if (wantvr) { /* Undo balancing of right eigenvectors (CWorkspace: none) (RWorkspace: need N) */ lapackf77_zgebak("B", "R", &n, &ilo, &ihi, &rwork[ibal], &n, &vr[vr_offset], &ldvr, &ierr); /* Normalize right eigenvectors and make largest component real */ for (i__ = 1; i__ <= n; ++i__) { scl = 1. / cblas_dznrm2(n, &vr[i__ * vr_dim1 + 1], 1); cblas_zdscal(n, scl, &vr[i__ * vr_dim1 + 1], 1); i__2 = n; for (k = 1; k <= i__2; ++k) { i__3 = k + i__ * vr_dim1; /* Computing 2nd power */ d__1 = MAGMA_Z_REAL(vr[i__3]); /* Computing 2nd power */ d__2 = MAGMA_Z_IMAG(vr[k + i__ * vr_dim1]); rwork[irwork + k - 1] = d__1 * d__1 + d__2 * d__2; } /* Comment: Fortran BLAS does not have to add 1 C BLAS must add one to cblas_idamax */ k = cblas_idamax(n, &rwork[irwork], 1)+1; z__2 = MAGMA_Z_CNJG(vr[k + i__ * vr_dim1]); d__1 = magma_dsqrt(rwork[irwork + k - 1]); MAGMA_Z_DSCALE(z__1, z__2, d__1); tmp = z__1; cblas_zscal(n, CBLAS_SADDR(tmp), &vr[i__ * vr_dim1 + 1], 1); i__2 = k + i__ * vr_dim1; i__3 = k + i__ * vr_dim1; d__1 = MAGMA_Z_REAL(vr[i__3]); MAGMA_Z_SET2REAL(z__1, d__1); vr[i__2] = z__1; } } /* Undo scaling if necessary */ L50: if (scalea) { i__1 = n - *info; /* Computing MAX */ i__3 = n - *info; i__2 = max(i__3,1); lapackf77_zlascl("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, geev_w_array + *info, &i__2, &ierr); if (*info > 0) { i__1 = ilo - 1; lapackf77_zlascl("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, geev_w_array, &n, &ierr); } } #if defined(VERSION3) magma_free( dT ); #endif return *info; } /* magma_zgeev */
double F77_dznrm2(const int *N, const void *X, const int *incX) { return cblas_dznrm2(*N, X, *incX); }
extern "C" magma_int_t magma_zgeev_m( char jobvl, char jobvr, magma_int_t n, magmaDoubleComplex *A, magma_int_t lda, magmaDoubleComplex *W, magmaDoubleComplex *vl, magma_int_t ldvl, magmaDoubleComplex *vr, magma_int_t ldvr, magmaDoubleComplex *work, magma_int_t lwork, double *rwork, magma_int_t *info ) { /* -- MAGMA (version 1.4.1) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver December 2013 Purpose ======= ZGEEV computes for an N-by-N complex nonsymmetric matrix A, the eigenvalues and, optionally, the left and/or right eigenvectors. The right eigenvector v(j) of A satisfies A * v(j) = lambda(j) * v(j) where lambda(j) is its eigenvalue. The left eigenvector u(j) of A satisfies u(j)**H * A = lambda(j) * u(j)**H where u(j)**H denotes the conjugate transpose of u(j). The computed eigenvectors are normalized to have Euclidean norm equal to 1 and largest component real. Arguments ========= JOBVL (input) CHARACTER*1 = 'N': left eigenvectors of A are not computed; = 'V': left eigenvectors of are computed. JOBVR (input) CHARACTER*1 = 'N': right eigenvectors of A are not computed; = 'V': right eigenvectors of A are computed. N (input) INTEGER The order of the matrix A. N >= 0. A (input/output) COMPLEX*16 array, dimension (LDA,N) On entry, the N-by-N matrix A. On exit, A has been overwritten. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). W (output) COMPLEX*16 array, dimension (N) W contains the computed eigenvalues. VL (output) COMPLEX*16 array, dimension (LDVL,N) If JOBVL = 'V', the left eigenvectors u(j) are stored one after another in the columns of VL, in the same order as their eigenvalues. If JOBVL = 'N', VL is not referenced. u(j) = VL(:,j), the j-th column of VL. LDVL (input) INTEGER The leading dimension of the array VL. LDVL >= 1; if JOBVL = 'V', LDVL >= N. VR (output) COMPLEX*16 array, dimension (LDVR,N) If JOBVR = 'V', the right eigenvectors v(j) are stored one after another in the columns of VR, in the same order as their eigenvalues. If JOBVR = 'N', VR is not referenced. v(j) = VR(:,j), the j-th column of VR. LDVR (input) INTEGER The leading dimension of the array VR. LDVR >= 1; if JOBVR = 'V', LDVR >= N. WORK (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK)) On exit, if INFO = 0, WORK(1) returns the optimal LWORK. LWORK (input) INTEGER The dimension of the array WORK. LWORK >= (1+nb)*N. If LWORK = -1, then a workspace query is assumed; the routine only calculates the optimal size of the WORK array, returns this value as the first entry of the WORK array, and no error message related to LWORK is issued by XERBLA. RWORK (workspace) DOUBLE PRECISION array, dimension (2*N) INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value. > 0: if INFO = i, the QR algorithm failed to compute all the eigenvalues, and no eigenvectors have been computed; elements and i+1:N of W contain eigenvalues which have converged. ===================================================================== */ #define vl(i,j) (vl + (i) + (j)*ldvl) #define vr(i,j) (vr + (i) + (j)*ldvr) magma_int_t c_one = 1; magma_int_t c_zero = 0; double d__1, d__2; magmaDoubleComplex z__1, z__2; magmaDoubleComplex tmp; double scl; double dum[1], eps; double anrm, cscale, bignum, smlnum; magma_int_t i, k, ilo, ihi; magma_int_t ibal, ierr, itau, iwrk, nout, liwrk, i__1, i__2, nb; magma_int_t scalea, minwrk, irwork, lquery, wantvl, wantvr, select[1]; char side[2] = {0, 0}; char jobvl_[2] = {jobvl, 0}; char jobvr_[2] = {jobvr, 0}; irwork = 0; *info = 0; lquery = lwork == -1; wantvl = lapackf77_lsame( jobvl_, "V" ); wantvr = lapackf77_lsame( jobvr_, "V" ); if (! wantvl && ! lapackf77_lsame( jobvl_, "N" )) { *info = -1; } else if (! wantvr && ! lapackf77_lsame( jobvr_, "N" )) { *info = -2; } else if (n < 0) { *info = -3; } else if (lda < max(1,n)) { *info = -5; } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) { *info = -8; } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) { *info = -10; } /* Compute workspace */ nb = magma_get_zgehrd_nb( n ); if (*info == 0) { minwrk = (1+nb)*n; work[0] = MAGMA_Z_MAKE( minwrk, 0 ); if (lwork < minwrk && ! lquery) { *info = -12; } } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } else if (lquery) { return *info; } /* Quick return if possible */ if (n == 0) { return *info; } #if defined(Version3) || defined(Version4) || defined(Version5) magmaDoubleComplex *dT; if (MAGMA_SUCCESS != magma_zmalloc( &dT, nb*n )) { *info = MAGMA_ERR_DEVICE_ALLOC; return *info; } #endif #if defined(Version4) || defined(Version5) magmaDoubleComplex *T; if (MAGMA_SUCCESS != magma_zmalloc_cpu( &T, nb*n )) { magma_free( dT ); *info = MAGMA_ERR_HOST_ALLOC; return *info; } #endif /* Get machine constants */ eps = lapackf77_dlamch( "P" ); smlnum = lapackf77_dlamch( "S" ); bignum = 1. / smlnum; lapackf77_dlabad( &smlnum, &bignum ); smlnum = magma_dsqrt( smlnum ) / eps; bignum = 1. / smlnum; /* Scale A if max element outside range [SMLNUM,BIGNUM] */ anrm = lapackf77_zlange( "M", &n, &n, A, &lda, dum ); scalea = 0; if (anrm > 0. && anrm < smlnum) { scalea = 1; cscale = smlnum; } else if (anrm > bignum) { scalea = 1; cscale = bignum; } if (scalea) { lapackf77_zlascl( "G", &c_zero, &c_zero, &anrm, &cscale, &n, &n, A, &lda, &ierr ); } /* Balance the matrix * (CWorkspace: none) * (RWorkspace: need N) */ ibal = 0; lapackf77_zgebal( "B", &n, A, &lda, &ilo, &ihi, &rwork[ibal], &ierr ); /* Reduce to upper Hessenberg form * (CWorkspace: need 2*N, prefer N + N*NB) * (RWorkspace: none) */ itau = 0; iwrk = itau + n; liwrk = lwork - iwrk; #if defined(Version1) // Version 1 - LAPACK lapackf77_zgehrd( &n, &ilo, &ihi, A, &lda, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(Version2) // Version 2 - LAPACK consistent HRD magma_zgehrd2( n, ilo, ihi, A, lda, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(Version3) // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored, magma_zgehrd( n, ilo, ihi, A, lda, &work[itau], &work[iwrk], liwrk, dT, &ierr ); #elif defined(Version4) || defined(Version5) // Version 4 - Multi-GPU, T on host magma_zgehrd_m( n, ilo, ihi, A, lda, &work[itau], &work[iwrk], liwrk, T, &ierr ); magma_zsetmatrix( nb, n, T, nb, dT, nb ); #endif if (wantvl) { /* Want left eigenvectors * Copy Householder vectors to VL */ side[0] = 'L'; lapackf77_zlacpy( MagmaLowerStr, &n, &n, A, &lda, vl, &ldvl ); /* Generate unitary matrix in VL * (CWorkspace: need 2*N-1, prefer N + (N-1)*NB) * (RWorkspace: none) */ #if defined(Version1) || defined(Version2) // Version 1 & 2 - LAPACK lapackf77_zunghr( &n, &ilo, &ihi, vl, &ldvl, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(Version3) || defined(Version4) // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored magma_zunghr( n, ilo, ihi, vl, ldvl, &work[itau], dT, nb, &ierr ); #elif defined(Version5) // Version 5 - Multi-GPU, T on host magma_zunghr_m( n, ilo, ihi, vl, ldvl, &work[itau], T, nb, &ierr ); #endif /* Perform QR iteration, accumulating Schur vectors in VL * (CWorkspace: need 1, prefer HSWORK (see comments) ) * (RWorkspace: none) */ iwrk = itau; liwrk = lwork - iwrk; lapackf77_zhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, W, vl, &ldvl, &work[iwrk], &liwrk, info ); if (wantvr) { /* Want left and right eigenvectors * Copy Schur vectors to VR */ side[0] = 'B'; lapackf77_zlacpy( "F", &n, &n, vl, &ldvl, vr, &ldvr ); } } else if (wantvr) { /* Want right eigenvectors * Copy Householder vectors to VR */ side[0] = 'R'; lapackf77_zlacpy( "L", &n, &n, A, &lda, vr, &ldvr ); /* Generate unitary matrix in VR * (CWorkspace: need 2*N-1, prefer N + (N-1)*NB) * (RWorkspace: none) */ #if defined(Version1) || defined(Version2) // Version 1 & 2 - LAPACK lapackf77_zunghr( &n, &ilo, &ihi, vr, &ldvr, &work[itau], &work[iwrk], &liwrk, &ierr ); #elif defined(Version3) || defined(Version4) // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored magma_zunghr( n, ilo, ihi, vr, ldvr, &work[itau], dT, nb, &ierr ); #elif defined(Version5) // Version 5 - Multi-GPU, T on host magma_zunghr_m( n, ilo, ihi, vr, ldvr, &work[itau], T, nb, &ierr ); #endif /* Perform QR iteration, accumulating Schur vectors in VR * (CWorkspace: need 1, prefer HSWORK (see comments) ) * (RWorkspace: none) */ iwrk = itau; liwrk = lwork - iwrk; lapackf77_zhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, W, vr, &ldvr, &work[iwrk], &liwrk, info ); } else { /* Compute eigenvalues only * (CWorkspace: need 1, prefer HSWORK (see comments) ) * (RWorkspace: none) */ iwrk = itau; liwrk = lwork - iwrk; lapackf77_zhseqr( "E", "N", &n, &ilo, &ihi, A, &lda, W, vr, &ldvr, &work[iwrk], &liwrk, info ); } /* If INFO > 0 from ZHSEQR, then quit */ if (*info > 0) { goto CLEANUP; } if (wantvl || wantvr) { /* Compute left and/or right eigenvectors * (CWorkspace: need 2*N) * (RWorkspace: need 2*N) */ irwork = ibal + n; lapackf77_ztrevc( side, "B", select, &n, A, &lda, vl, &ldvl, vr, &ldvr, &n, &nout, &work[iwrk], &rwork[irwork], &ierr ); } if (wantvl) { /* Undo balancing of left eigenvectors * (CWorkspace: none) * (RWorkspace: need N) */ lapackf77_zgebak( "B", "L", &n, &ilo, &ihi, &rwork[ibal], &n, vl, &ldvl, &ierr ); /* Normalize left eigenvectors and make largest component real */ for (i = 0; i < n; ++i) { scl = 1. / cblas_dznrm2( n, vl(0,i), 1 ); cblas_zdscal( n, scl, vl(0,i), 1 ); for (k = 0; k < n; ++k) { /* Computing 2nd power */ d__1 = MAGMA_Z_REAL( *vl(k,i) ); d__2 = MAGMA_Z_IMAG( *vl(k,i) ); rwork[irwork + k] = d__1*d__1 + d__2*d__2; } k = cblas_idamax( n, &rwork[irwork], 1 ); z__2 = MAGMA_Z_CNJG( *vl(k,i) ); d__1 = magma_dsqrt( rwork[irwork + k] ); MAGMA_Z_DSCALE( z__1, z__2, d__1 ); tmp = z__1; cblas_zscal( n, CBLAS_SADDR(tmp), vl(0,i), 1 ); d__1 = MAGMA_Z_REAL( *vl(k,i) ); z__1 = MAGMA_Z_MAKE( d__1, 0 ); *vl(k,i) = z__1; } } if (wantvr) { /* Undo balancing of right eigenvectors * (CWorkspace: none) * (RWorkspace: need N) */ lapackf77_zgebak( "B", "R", &n, &ilo, &ihi, &rwork[ibal], &n, vr, &ldvr, &ierr ); /* Normalize right eigenvectors and make largest component real */ for (i = 0; i < n; ++i) { scl = 1. / cblas_dznrm2( n, vr(0,i), 1 ); cblas_zdscal( n, scl, vr(0,i), 1 ); for (k = 0; k < n; ++k) { /* Computing 2nd power */ d__1 = MAGMA_Z_REAL( *vr(k,i) ); d__2 = MAGMA_Z_IMAG( *vr(k,i) ); rwork[irwork + k] = d__1*d__1 + d__2*d__2; } k = cblas_idamax( n, &rwork[irwork], 1 ); z__2 = MAGMA_Z_CNJG( *vr(k,i) ); d__1 = magma_dsqrt( rwork[irwork + k] ); MAGMA_Z_DSCALE( z__1, z__2, d__1 ); tmp = z__1; cblas_zscal( n, CBLAS_SADDR(tmp), vr(0,i), 1 ); d__1 = MAGMA_Z_REAL( *vr(k,i) ); z__1 = MAGMA_Z_MAKE( d__1, 0 ); *vr(k,i) = z__1; } } CLEANUP: /* Undo scaling if necessary */ if (scalea) { i__1 = n - (*info); i__2 = max( n - (*info), 1 ); lapackf77_zlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one, W + (*info), &i__2, &ierr ); if (*info > 0) { i__1 = ilo - 1; lapackf77_zlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one, W, &n, &ierr ); } } #if defined(Version3) || defined(Version4) || defined(Version5) magma_free( dT ); #endif #if defined(Version4) || defined(Version5) magma_free_cpu( T ); #endif return *info; } /* magma_zgeev */
/*! * Power method to compute the norm of the operator A. * * \retval bound upper bound on norm of the continuos * Fourier transform operator (double). * \param[in] A Pointer to the measurement operator. * \param[in] A_data Data structure associated to A. * \param[in] At Pointer to the the adjoint of the measurement operator. * \param[in] At_data Data structure associated to At. * * \authors Rafael Carrillo */ double purify_measurement_pow_meth(void (*A)(void *out, void *in, void **data), void **A_data, void (*At)(void *out, void *in, void **data), void **At_data) { int i, iter, nx, ny; int seedn = 51; double bound, norm, rel_ob; purify_measurement_cparam *param; complex double *y; complex double *x; //Cast input pointers param = (purify_measurement_cparam*)A_data[0]; nx = param->nx1*param->ny1; ny = param->nmeas; iter = 0; y = (complex double*)malloc((ny) * sizeof( complex double)); PURIFY_ERROR_MEM_ALLOC_CHECK(y); x = (complex double*)malloc((nx) * sizeof( complex double)); PURIFY_ERROR_MEM_ALLOC_CHECK(x); if (param->nmeas > nx){ for (i=0; i < nx; i++) { x[i] = purify_ran_gasdev2(seedn) + purify_ran_gasdev2(seedn)*I; } norm = cblas_dznrm2(nx, (void*)x, 1); for (i=0; i < nx; i++) { x[i] = x[i]/norm; } norm = 1.0; //main loop while (iter < 200){ A((void*)y, (void*)x, A_data); At((void*)x, (void*)y, At_data); bound = cblas_dznrm2(nx, (void*)x, 1); rel_ob = (bound - norm)/norm; if (rel_ob <= 0.001) break; norm = bound; for (i=0; i < nx; i++) { x[i] = x[i]/norm; } iter++; } } else{ for (i=0; i < ny; i++) { y[i] = purify_ran_gasdev2(seedn) + purify_ran_gasdev2(seedn)*I; } norm = cblas_dznrm2(ny, (void*)y, 1); for (i=0; i < ny; i++) { y[i] = y[i]/norm; } norm = 1.0; //main loop while (iter < 200){ At((void*)x, (void*)y, At_data); A((void*)y, (void*)x, A_data); bound = cblas_dznrm2(ny, (void*)y, 1); rel_ob = (bound - norm)/norm; if (rel_ob <= 0.001) break; norm = bound; for (i=0; i < ny; i++) { y[i] = y[i]/norm; } iter++; } } free(y); free(x); return bound; }
std::complex<double> HostVector<std::complex<double> >::Norm(void) const { return cblas_dznrm2(this->size_, this->vec_, 1); }
// // Overloaded function for dispatching to // * CBLAS backend, and // * complex<double> value-type. // inline double nrm2( const int n, const std::complex<double>* x, const int incx ) { return cblas_dznrm2( n, x, incx ); }
int main(int argc, char *argv[]) { int i, j, Nx, Ny, Nr, Nb; int seedn=54; double sigma; double a; double mse; double snr; double snr_out; double gamma=0.001; double aux1, aux2, aux3, aux4; complex double alpha; purify_image img, img_copy; purify_visibility_filetype filetype_vis; purify_image_filetype filetype_img; complex double *xinc; complex double *y0; complex double *y; complex double *noise; double *xout; double *w; double *error; complex double *xoutc; double *wdx; double *wdy; double *dummyr; complex double *dummyc; //parameters for the continuos Fourier Transform double *deconv; purify_sparsemat_row gmat; purify_visibility vis_test; purify_measurement_cparam param_m1; purify_measurement_cparam param_m2; complex double *fft_temp1; complex double *fft_temp2; void *datafwd[5]; void *dataadj[5]; fftw_plan planfwd; fftw_plan planadj; //Structures for sparsity operator sopt_wavelet_type *dict_types; sopt_wavelet_type *dict_types1; sopt_wavelet_type *dict_types2; sopt_sara_param param1; sopt_sara_param param2; sopt_sara_param param3; void *datas[1]; void *datas1[1]; void *datas2[1]; //Structures for the opmization problems sopt_l1_sdmmparam param4; sopt_l1_rwparam param5; sopt_prox_tvparam param6; sopt_tv_sdmmparam param7; sopt_tv_rwparam param8; clock_t start, stop; double t = 0.0; double start1, stop1; int dimy, dimx; //Image dimension of the zero padded image //Dimensions should be power of 2 dimx = 256; dimy = 256; //Define parameters filetype_vis = PURIFY_VISIBILITY_FILETYPE_PROFILE_VIS; filetype_img = PURIFY_IMAGE_FILETYPE_FITS; //Read coverage purify_visibility_readfile(&vis_test, "./data/images/Coverages/cont_sim4.vis", filetype_vis); printf("Number of visibilities: %i \n\n", vis_test.nmeas); // Input image. img.fov_x = 1.0 / 180.0 * PURIFY_PI; img.fov_y = 1.0 / 180.0 * PURIFY_PI; img.nx = 4; img.ny = 4; //Read input image purify_image_readfile(&img, "data/images/Einstein.fits", 1); printf("Image dimension: %i, %i \n\n", img.nx, img.ny); // purify_image_writefile(&img, "data/test/Einstein_double.fits", filetype_img); param_m1.nmeas = vis_test.nmeas; param_m1.ny1 = dimy; param_m1.nx1 = dimx; param_m1.ofy = 2; param_m1.ofx = 2; param_m1.ky = 2; param_m1.kx = 2; param_m2.nmeas = vis_test.nmeas; param_m2.ny1 = dimy; param_m2.nx1 = dimx; param_m2.ofy = 2; param_m2.ofx = 2; param_m2.ky = 2; param_m2.kx = 2; Nb = 9; Nx=param_m2.ny1*param_m2.nx1; Nr=Nb*Nx; Ny=param_m2.nmeas; //Memory allocation for the different variables deconv = (double*)malloc((Nx) * sizeof(double)); PURIFY_ERROR_MEM_ALLOC_CHECK(deconv); xinc = (complex double*)malloc((Nx) * sizeof(complex double)); PURIFY_ERROR_MEM_ALLOC_CHECK(xinc); xout = (double*)malloc((Nx) * sizeof(double)); PURIFY_ERROR_MEM_ALLOC_CHECK(xout); y = (complex double*)malloc((vis_test.nmeas) * sizeof(complex double)); PURIFY_ERROR_MEM_ALLOC_CHECK(y); y0 = (complex double*)malloc((vis_test.nmeas) * sizeof(complex double)); PURIFY_ERROR_MEM_ALLOC_CHECK(y0); noise = (complex double*)malloc((vis_test.nmeas) * sizeof(complex double)); PURIFY_ERROR_MEM_ALLOC_CHECK(noise); w = (double*)malloc((Nr) * sizeof(double)); PURIFY_ERROR_MEM_ALLOC_CHECK(w); error = (double*)malloc((Nx) * sizeof(double)); PURIFY_ERROR_MEM_ALLOC_CHECK(error); xoutc = (complex double*)malloc((Nx) * sizeof(complex double)); PURIFY_ERROR_MEM_ALLOC_CHECK(xoutc); wdx = (double*)malloc((Nx) * sizeof(double)); PURIFY_ERROR_MEM_ALLOC_CHECK(wdx); wdy = (double*)malloc((Nx) * sizeof(double)); PURIFY_ERROR_MEM_ALLOC_CHECK(wdy); dummyr = malloc(Nr * sizeof(double)); PURIFY_ERROR_MEM_ALLOC_CHECK(dummyr); dummyc = malloc(Nr * sizeof(complex double)); PURIFY_ERROR_MEM_ALLOC_CHECK(dummyc); for (i=0; i < Nx; i++){ xinc[i] = 0.0 + 0.0*I; } for (i=0; i < img.nx; i++){ for (j=0; j < img.ny; j++){ xinc[i+j*param_m1.nx1] = img.pix[i+j*img.nx] + 0.0*I; } } //Initialize griding matrix assert((start = clock())!=-1); purify_measurement_init_cft(&gmat, deconv, vis_test.u, vis_test.v, ¶m_m1); stop = clock(); t = (double) (stop-start)/CLOCKS_PER_SEC; printf("Time initalization: %f \n\n", t); for(i = 0; i < img.nx * img.ny; ++i){ deconv[i] = 1.0; } //Memory allocation for the fft i = Nx*param_m1.ofy*param_m1.ofx; fft_temp1 = (complex double*)malloc((i) * sizeof(complex double)); PURIFY_ERROR_MEM_ALLOC_CHECK(fft_temp1); fft_temp2 = (complex double*)malloc((i) * sizeof(complex double)); PURIFY_ERROR_MEM_ALLOC_CHECK(fft_temp2); //FFT plan planfwd = fftw_plan_dft_2d(param_m1.nx1*param_m1.ofx, param_m1.ny1*param_m1.ofy, fft_temp1, fft_temp1, FFTW_FORWARD, FFTW_MEASURE); planadj = fftw_plan_dft_2d(param_m1.nx1*param_m1.ofx, param_m1.ny1*param_m1.ofy, fft_temp2, fft_temp2, FFTW_BACKWARD, FFTW_MEASURE); datafwd[0] = (void*)¶m_m1; datafwd[1] = (void*)deconv; datafwd[2] = (void*)&gmat; datafwd[3] = (void*)&planfwd; datafwd[4] = (void*)fft_temp1; dataadj[0] = (void*)¶m_m2; dataadj[1] = (void*)deconv; dataadj[2] = (void*)&gmat; dataadj[3] = (void*)&planadj; dataadj[4] = (void*)fft_temp2; printf("FFT plan done \n\n"); assert((start = clock())!=-1); purify_measurement_cftfwd((void*)y0, (void*)xinc, datafwd); stop = clock(); t = (double) (stop-start)/CLOCKS_PER_SEC; printf("Time forward operator: %f \n\n", t); //Noise realization //Input snr snr = 30.0; a = cblas_dznrm2(Ny, (void*)y0, 1); sigma = a*pow(10.0,-(snr/20.0))/sqrt(Ny); FILE *fout = fopen("ein.uv", "w"); for (i=0; i < Ny; i++) { // noise[i] = (sopt_ran_gasdev2(seedn) + sopt_ran_gasdev2(seedn)*I)*(sigma/sqrt(2)); noise[i] = 0; y[i] = y0[i] + noise[i]; fprintf(fout, "%14.5e%14.5e%14.5e%14.5e%14.5e%14.5e\n", vis_test.u[i], vis_test.v[i], vis_test.w[i], creal(y[i]), cimag(y[i]), 1.0); } fclose(fout); //Rescaling the measurements aux4 = (double)Ny/(double)Nx; for (i=0; i < Ny; i++) { y[i] = y[i]/sqrt(aux4); } for (i=0; i < Nx; i++) { deconv[i] = deconv[i]/sqrt(aux4); } // Output image. img_copy.fov_x = 1.0 / 180.0 * PURIFY_PI; img_copy.fov_y = 1.0 / 180.0 * PURIFY_PI; img_copy.nx = param_m1.nx1; img_copy.ny = param_m1.ny1; for (i=0; i < Nx; i++){ xoutc[i] = 0.0 + 0.0*I; } //Dirty image purify_measurement_cftadj((void*)xoutc, (void*)y, dataadj); for (i=0; i < Nx; i++) { xout[i] = creal(xoutc[i]); } aux1 = purify_utils_maxarray(xout, Nx); img_copy.pix = (double*)malloc((Nx) * sizeof(double)); PURIFY_ERROR_MEM_ALLOC_CHECK(img_copy.pix); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xoutc[i]); } purify_image_writefile(&img_copy, "eindirty.fits", filetype_img); return 0; //SARA structure initialization param1.ndict = Nb; param1.real = 0; dict_types = malloc(param1.ndict * sizeof(sopt_wavelet_type)); PURIFY_ERROR_MEM_ALLOC_CHECK(dict_types); dict_types[0] = SOPT_WAVELET_DB1; dict_types[1] = SOPT_WAVELET_DB2; dict_types[2] = SOPT_WAVELET_DB3; dict_types[3] = SOPT_WAVELET_DB4; dict_types[4] = SOPT_WAVELET_DB5; dict_types[5] = SOPT_WAVELET_DB6; dict_types[6] = SOPT_WAVELET_DB7; dict_types[7] = SOPT_WAVELET_DB8; dict_types[8] = SOPT_WAVELET_Dirac; sopt_sara_initop(¶m1, param_m1.ny1, param_m1.nx1, 4, dict_types); datas[0] = (void*)¶m1; //Db8 structure initialization param2.ndict = 1; param2.real = 0; dict_types1 = malloc(param2.ndict * sizeof(sopt_wavelet_type)); PURIFY_ERROR_MEM_ALLOC_CHECK(dict_types1); dict_types1[0] = SOPT_WAVELET_DB8; sopt_sara_initop(¶m2, param_m1.ny1, param_m1.nx1, 4, dict_types1); datas1[0] = (void*)¶m2; //Dirac structure initialization param3.ndict = 1; param3.real = 0; dict_types2 = malloc(param3.ndict * sizeof(sopt_wavelet_type)); PURIFY_ERROR_MEM_ALLOC_CHECK(dict_types2); dict_types2[0] = SOPT_WAVELET_Dirac; sopt_sara_initop(¶m3, param_m1.ny1, param_m1.nx1, 4, dict_types2); datas2[0] = (void*)¶m3; //Scaling constants in the diferent representation domains sopt_sara_analysisop((void*)dummyc, (void*)xoutc, datas); for (i=0; i < Nr; i++) { dummyr[i] = creal(dummyc[i]); } aux2 = purify_utils_maxarray(dummyr, Nr); sopt_sara_analysisop((void*)dummyc, (void*)xoutc, datas1); for (i=0; i < Nr; i++) { dummyr[i] = creal(dummyc[i]); } aux3 = purify_utils_maxarray(dummyr, Nx); // Output image. img_copy.fov_x = 1.0 / 180.0 * PURIFY_PI; img_copy.fov_y = 1.0 / 180.0 * PURIFY_PI; img_copy.nx = param_m1.nx1; img_copy.ny = param_m1.ny1; //Initial solution and weights for (i=0; i < Nx; i++) { xoutc[i] = 0.0 + 0.0*I; wdx[i] = 1.0; wdy[i] = 1.0; } for (i=0; i < Nr; i++){ w[i] = 1.0; } //Copy true image in xout for (i=0; i < Nx; i++) { xout[i] = creal(xinc[i]); } printf("**********************\n"); printf("BPSA reconstruction\n"); printf("**********************\n"); //Structure for the L1 solver param4.verbose = 2; param4.max_iter = 300; param4.gamma = gamma*aux2; param4.rel_obj = 0.001; param4.epsilon = sqrt(Ny + 2*sqrt(Ny))*sigma/sqrt(aux4); param4.epsilon_tol = 0.01; param4.real_data = 0; param4.cg_max_iter = 100; param4.cg_tol = 0.000001; //Initial solution for (i=0; i < Nx; i++) { xoutc[i] = 0.0 + 0.0*I; } #ifdef _OPENMP start1 = omp_get_wtime(); #else assert((start = clock())!=-1); #endif sopt_l1_sdmm((void*)xoutc, Nx, &purify_measurement_cftfwd, datafwd, &purify_measurement_cftadj, dataadj, &sopt_sara_synthesisop, datas, &sopt_sara_analysisop, datas, Nr, (void*)y, Ny, w, param4); #ifdef _OPENMP stop1 = omp_get_wtime(); t = stop1 - start1; #else stop = clock(); t = (double) (stop-start)/CLOCKS_PER_SEC; #endif printf("Time BPSA: %f \n\n", t); //SNR for (i=0; i < Nx; i++) { error[i] = creal(xoutc[i])-xout[i]; } mse = cblas_dnrm2(Nx, error, 1); a = cblas_dnrm2(Nx, xout, 1); snr_out = 20.0*log10(a/mse); printf("SNR: %f dB\n\n", snr_out); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xoutc[i]); } purify_image_writefile(&img_copy, "./data/test/einbpsa.fits", filetype_img); //Residual image purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd); alpha = -1.0 +0.0*I; cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1); purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xinc[i]); } purify_image_writefile(&img_copy, "data/test/einbpsares.fits", filetype_img); //Error image for (i=0; i < Nx; i++){ img_copy.pix[i] = error[i]; } purify_image_writefile(&img_copy, "data/test/einbpsaerror.fits", filetype_img); printf("**********************\n"); printf("SARA reconstruction\n"); printf("**********************\n"); //Structure for the RWL1 solver param5.verbose = 2; param5.max_iter = 5; param5.rel_var = 0.001; param5.sigma = sigma*sqrt((double)Ny/(double)Nr); param5.init_sol = 1; #ifdef _OPENMP start1 = omp_get_wtime(); #else assert((start = clock())!=-1); #endif sopt_l1_rwsdmm((void*)xoutc, Nx, &purify_measurement_cftfwd, datafwd, &purify_measurement_cftadj, dataadj, &sopt_sara_synthesisop, datas, &sopt_sara_analysisop, datas, Nr, (void*)y, Ny, param4, param5); #ifdef _OPENMP stop1 = omp_get_wtime(); t = stop1 - start1; #else stop = clock(); t = (double) (stop-start)/CLOCKS_PER_SEC; #endif printf("Time SARA: %f \n\n", t); //SNR for (i=0; i < Nx; i++) { error[i] = creal(xoutc[i])-xout[i]; } mse = cblas_dnrm2(Nx, error, 1); a = cblas_dnrm2(Nx, xout, 1); snr_out = 20.0*log10(a/mse); printf("SNR: %f dB\n\n", snr_out); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xoutc[i]); } purify_image_writefile(&img_copy, "data/test/einsara.fits", filetype_img); //Residual image purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd); alpha = -1.0 +0.0*I; cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1); purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xinc[i]); } purify_image_writefile(&img_copy, "data/test/einsarares.fits", filetype_img); //Error image for (i=0; i < Nx; i++){ img_copy.pix[i] = error[i]; } purify_image_writefile(&img_copy, "data/test/einsaraerror.fits", filetype_img); printf("**********************\n"); printf("TV reconstruction\n"); printf("**********************\n"); //Structure for the TV prox param6.verbose = 1; param6.max_iter = 50; param6.rel_obj = 0.0001; //Structure for the TV solver param7.verbose = 2; param7.max_iter = 300; param7.gamma = gamma*aux1; param7.rel_obj = 0.001; param7.epsilon = sqrt(Ny + 2*sqrt(Ny))*sigma/sqrt(aux4); param7.epsilon_tol = 0.01; param7.real_data = 0; param7.cg_max_iter = 100; param7.cg_tol = 0.000001; param7.paramtv = param6; //Initial solution and weights for (i=0; i < Nx; i++) { xoutc[i] = 0.0 + 0.0*I; wdx[i] = 1.0; wdy[i] = 1.0; } assert((start = clock())!=-1); sopt_tv_sdmm((void*)xoutc, dimx, dimy, &purify_measurement_cftfwd, datafwd, &purify_measurement_cftadj, dataadj, (void*)y, Ny, wdx, wdy, param7); stop = clock(); t = (double) (stop-start)/CLOCKS_PER_SEC; printf("Time TV: %f \n\n", t); //SNR for (i=0; i < Nx; i++) { error[i] = creal(xoutc[i])-xout[i]; } mse = cblas_dnrm2(Nx, error, 1); a = cblas_dnrm2(Nx, xout, 1); snr_out = 20.0*log10(a/mse); printf("SNR: %f dB\n\n", snr_out); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xoutc[i]); } purify_image_writefile(&img_copy, "data/test/eintv.fits", filetype_img); //Residual image purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd); alpha = -1.0 +0.0*I; cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1); purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xinc[i]); } purify_image_writefile(&img_copy, "data/test/eintvres.fits", filetype_img); //Error image for (i=0; i < Nx; i++){ img_copy.pix[i] = error[i]; } purify_image_writefile(&img_copy, "data/test/eintverror.fits", filetype_img); printf("**********************\n"); printf("RWTV reconstruction\n"); printf("**********************\n"); //Structure for the RWTV solver param8.verbose = 2; param8.max_iter = 5; param8.rel_var = 0.001; param8.sigma = sigma*sqrt(Ny/(2*Nx)); param8.init_sol = 1; assert((start = clock())!=-1); sopt_tv_rwsdmm((void*)xoutc, dimx, dimy, &purify_measurement_cftfwd, datafwd, &purify_measurement_cftadj, dataadj, (void*)y, Ny, param7, param8); stop = clock(); t = (double) (stop-start)/CLOCKS_PER_SEC; printf("Time RWTV: %f \n\n", t); //SNR for (i=0; i < Nx; i++) { error[i] = creal(xoutc[i])-xout[i]; } mse = cblas_dnrm2(Nx, error, 1); a = cblas_dnrm2(Nx, xout, 1); snr_out = 20.0*log10(a/mse); printf("SNR: %f dB\n\n", snr_out); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xoutc[i]); } purify_image_writefile(&img_copy, "data/test/einrwtv.fits", filetype_img); //Residual image purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd); alpha = -1.0 +0.0*I; cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1); purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xinc[i]); } purify_image_writefile(&img_copy, "data/test/einrwtvres.fits", filetype_img); //Error image for (i=0; i < Nx; i++){ img_copy.pix[i] = error[i]; } purify_image_writefile(&img_copy, "data/test/einrwtverror.fits", filetype_img); printf("**********************\n"); printf("Db8 reconstruction\n"); printf("**********************\n"); //Initial solution for (i=0; i < Nx; i++) { xoutc[i] = 0.0 + 0.0*I; } param4.gamma = gamma*aux3; assert((start = clock())!=-1); sopt_l1_sdmm((void*)xoutc, Nx, &purify_measurement_cftfwd, datafwd, &purify_measurement_cftadj, dataadj, &sopt_sara_synthesisop, datas1, &sopt_sara_analysisop, datas1, Nx, (void*)y, Ny, w, param4); stop = clock(); t = (double) (stop-start)/CLOCKS_PER_SEC; printf("Time BPDb8: %f \n\n", t); //SNR for (i=0; i < Nx; i++) { error[i] = creal(xoutc[i])-xout[i]; } mse = cblas_dnrm2(Nx, error, 1); a = cblas_dnrm2(Nx, xout, 1); snr_out = 20.0*log10(a/mse); printf("SNR: %f dB\n\n", snr_out); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xoutc[i]); } purify_image_writefile(&img_copy, "data/test/eindb8.fits", filetype_img); //Residual image purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd); alpha = -1.0 +0.0*I; cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1); purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xinc[i]); } purify_image_writefile(&img_copy, "data/test/eindb8res.fits", filetype_img); //Error image for (i=0; i < Nx; i++){ img_copy.pix[i] = error[i]; } purify_image_writefile(&img_copy, "data/test/eindb8error.fits", filetype_img); printf("**********************\n"); printf("RWBPDb8 reconstruction\n"); printf("**********************\n"); //Structure for the RWL1 solver param5.verbose = 2; param5.max_iter = 5; param5.rel_var = 0.001; param5.sigma = sigma*sqrt((double)Ny/(double)Nx); param5.init_sol = 1; assert((start = clock())!=-1); sopt_l1_rwsdmm((void*)xoutc, Nx, &purify_measurement_cftfwd, datafwd, &purify_measurement_cftadj, dataadj, &sopt_sara_synthesisop, datas1, &sopt_sara_analysisop, datas1, Nx, (void*)y, Ny, param4, param5); stop = clock(); t = (double) (stop-start)/CLOCKS_PER_SEC; printf("Time RWBPDb8: %f \n\n", t); //SNR for (i=0; i < Nx; i++) { error[i] = creal(xoutc[i])-xout[i]; } mse = cblas_dnrm2(Nx, error, 1); a = cblas_dnrm2(Nx, xout, 1); snr_out = 20.0*log10(a/mse); printf("SNR: %f dB\n\n", snr_out); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xoutc[i]); } purify_image_writefile(&img_copy, "data/test/einrwdb8.fits", filetype_img); //Residual image purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd); alpha = -1.0 +0.0*I; cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1); purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xinc[i]); } purify_image_writefile(&img_copy, "data/test/einrwdb8res.fits", filetype_img); //Error image for (i=0; i < Nx; i++){ img_copy.pix[i] = error[i]; } purify_image_writefile(&img_copy, "data/test/einrwdb8error.fits", filetype_img); printf("**********************\n"); printf("BP reconstruction\n"); printf("**********************\n"); param4.gamma = gamma*aux1; //Initial solution for (i=0; i < Nx; i++) { xoutc[i] = 0.0 + 0.0*I; } assert((start = clock())!=-1); sopt_l1_sdmm((void*)xoutc, Nx, &purify_measurement_cftfwd, datafwd, &purify_measurement_cftadj, dataadj, &sopt_sara_synthesisop, datas2, &sopt_sara_analysisop, datas2, Nx, (void*)y, Ny, w, param4); stop = clock(); t = (double) (stop-start)/CLOCKS_PER_SEC; printf("Time BP: %f \n\n", t); //SNR for (i=0; i < Nx; i++) { error[i] = creal(xoutc[i])-xout[i]; } mse = cblas_dnrm2(Nx, error, 1); a = cblas_dnrm2(Nx, xout, 1); snr_out = 20.0*log10(a/mse); printf("SNR: %f dB\n\n", snr_out); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xoutc[i]); } purify_image_writefile(&img_copy, "data/test/einbp.fits", filetype_img); //Residual image purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd); alpha = -1.0 +0.0*I; cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1); purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xinc[i]); } purify_image_writefile(&img_copy, "data/test/einbpres.fits", filetype_img); //Error image for (i=0; i < Nx; i++){ img_copy.pix[i] = error[i]; } purify_image_writefile(&img_copy, "data/test/einbperror.fits", filetype_img); printf("**********************\n"); printf("RWBP reconstruction\n"); printf("**********************\n"); //Structure for the RWL1 solver param5.verbose = 2; param5.max_iter = 5; param5.rel_var = 0.001; param5.sigma = sigma*sqrt((double)Ny/(double)Nx); param5.init_sol = 1; assert((start = clock())!=-1); sopt_l1_rwsdmm((void*)xoutc, Nx, &purify_measurement_cftfwd, datafwd, &purify_measurement_cftadj, dataadj, &sopt_sara_synthesisop, datas2, &sopt_sara_analysisop, datas2, Nx, (void*)y, Ny, param4, param5); stop = clock(); t = (double) (stop-start)/CLOCKS_PER_SEC; printf("Time RWBP: %f \n\n", t); //SNR for (i=0; i < Nx; i++) { error[i] = creal(xoutc[i])-xout[i]; } mse = cblas_dnrm2(Nx, error, 1); a = cblas_dnrm2(Nx, xout, 1); snr_out = 20.0*log10(a/mse); printf("SNR: %f dB\n\n", snr_out); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xoutc[i]); } purify_image_writefile(&img_copy, "data/test/einrwbp.fits", filetype_img); //Residual image purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd); alpha = -1.0 +0.0*I; cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1); purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj); for (i=0; i < Nx; i++){ img_copy.pix[i] = creal(xinc[i]); } purify_image_writefile(&img_copy, "data/test/einrwbpres.fits", filetype_img); //Error image for (i=0; i < Nx; i++){ img_copy.pix[i] = error[i]; } purify_image_writefile(&img_copy, "data/test/einrwbperror.fits", filetype_img); //Free all memory purify_image_free(&img); purify_image_free(&img_copy); free(deconv); purify_visibility_free(&vis_test); free(y); free(xinc); free(xout); free(w); free(noise); free(y0); free(error); free(xoutc); free(wdx); free(wdy); sopt_sara_free(¶m1); sopt_sara_free(¶m2); sopt_sara_free(¶m3); free(dict_types); free(dict_types1); free(dict_types2); free(fft_temp1); free(fft_temp2); fftw_destroy_plan(planfwd); fftw_destroy_plan(planadj); purify_sparsemat_freer(&gmat); free(dummyr); free(dummyc); return 0; }