Esempio n. 1
0
void eblas_dznrm2_sub(size_t iStart, size_t iStop, const complex* x, int incx, double* ret, std::mutex* lock)
{	//Compute this thread's contribution:
	double retSub = cblas_dznrm2(iStop-iStart, x+incx*iStart, incx);
	//Accumulate over threads (need sync):
	lock->lock();
	*ret += retSub*retSub;
	lock->unlock();
}
Esempio n. 2
0
double eblas_dznrm2(int N, const complex* x, int incx)
{	
	#ifdef MKL_PROVIDES_BLAS
	return cblas_dznrm2(N, x, incx);
	#else
	double ret = 0.;
	std::mutex lock;
	threadLaunch((N<100000) ? 1 : 0, eblas_dznrm2_sub, N, x, incx, &ret, &lock);
	return sqrt(ret);
	#endif
}
Esempio n. 3
0
VALUE rb_blas_xnrm2(int argc, VALUE *argv, VALUE self)
{
  Matrix *dx;
  int incx;
  int incy;
  int n;
  //char error_msg[64];
  VALUE n_value,  incx_value;
  
  rb_scan_args(argc, argv, "02",   &incx_value, &n_value);
  
  Data_Get_Struct(self, Matrix, dx);

  if(incx_value == Qnil)
    incx = 1;
  else
    incx = NUM2INT(incx_value);
  
  if(n_value == Qnil)
    n = dx->nrows;
  else
    n = NUM2INT(n_value);

  if(dx == NULL || dx->ncols != 1)
  { //sprintf(error_msg, "Self is not a Vector");
    rb_raise(rb_eRuntimeError, "Self is not a Vector");
  }
  
  switch(dx->data_type)
  {
  case Single_t: //s
    return rb_float_new(cblas_snrm2(n , (float *)dx->data, incx)); 
  case Double_t: //d
    return rb_float_new(cblas_dnrm2(n , (double *)dx->data, incx)); 
  case Complex_t: //c
    return rb_float_new(cblas_scnrm2(n , dx->data, incx)); 
  case Double_Complex_t: //z
    return rb_float_new(cblas_dznrm2(n , dx->data, incx)); 
  default:
    //sprintf(error_msg, "Invalid data_type (%d) in Matrix", dx->data_type);
    rb_raise(rb_eRuntimeError, "Invalid data_type (%d) in Matrix", dx->data_type);
    return Qnil; //Never reaches here.
  }
}
Esempio n. 4
0
// ----------------------------------------
int main( int argc, char** argv )
{
    TESTING_INIT();
    
    //real_Double_t   t_m, t_c, t_f;
    magma_int_t ione = 1;
    
    magmaDoubleComplex  *A, *B;
    double diff, error;
    magma_int_t ISEED[4] = {0,0,0,1};
    magma_int_t m, n, k, size, maxn, ld;
    magmaDoubleComplex x2_m, x2_c;  // complex x for magma, cblas/fortran blas respectively
    double x_m, x_c;  // x for magma, cblas/fortran blas respectively
    
    magma_opts opts;
    parse_opts( argc, argv, &opts );
    
    opts.tolerance = max( 100., opts.tolerance );
    double tol = opts.tolerance * lapackf77_dlamch("E");
    gTol = tol;
    
    printf( "!! Calling these CBLAS and Fortran BLAS sometimes crashes (segfault), which !!\n"
            "!! is why we use wrappers. It does not necesarily indicate a bug in MAGMA.  !!\n"
            "\n"
            "Diff  compares MAGMA wrapper        to CBLAS and BLAS function; should be exactly 0.\n"
            "Error compares MAGMA implementation to CBLAS and BLAS function; should be ~ machine epsilon.\n"
            "\n" );
    
    double total_diff  = 0.;
    double total_error = 0.;
    int inc[] = { 1 };  //{ -2, -1, 1, 2 };  //{ 1 };  //{ -1, 1 };
    int ninc = sizeof(inc)/sizeof(*inc);
    
    for( int itest = 0; itest < opts.ntest; ++itest ) {
        m = opts.msize[itest];
        n = opts.nsize[itest];
        k = opts.ksize[itest];
        
    for( int iincx = 0; iincx < ninc; ++iincx ) {
        magma_int_t incx = inc[iincx];
        
    for( int iincy = 0; iincy < ninc; ++iincy ) {
        magma_int_t incy = inc[iincy];
        
        printf("=========================================================================\n");
        printf( "m=%d, n=%d, k=%d, incx = %d, incy = %d\n",
                (int) m, (int) n, (int) k, (int) incx, (int) incy );
        printf( "Function              MAGMA     CBLAS     BLAS        Diff      Error\n"
                "                      msec      msec      msec\n" );
        
        // allocate matrices
        // over-allocate so they can be any combination of
        // {m,n,k} * {abs(incx), abs(incy)} by
        // {m,n,k} * {abs(incx), abs(incy)}
        maxn = max( max( m, n ), k ) * max( abs(incx), abs(incy) );
        ld = max( 1, maxn );
        size = ld*maxn;
        magma_zmalloc_pinned( &A,  size );  assert( A   != NULL );
        magma_zmalloc_pinned( &B,  size );  assert( B   != NULL );
        
        // initialize matrices
        lapackf77_zlarnv( &ione, ISEED, &size, A );
        lapackf77_zlarnv( &ione, ISEED, &size, B );
        
        printf( "Level 1 BLAS ----------------------------------------------------------\n" );
        
        
        // ----- test DZASUM
        // get one-norm of column j of A
        if ( incx > 0 && incx == incy ) {  // positive, no incy
            diff  = 0;
            error = 0;
            for( int j = 0; j < k; ++j ) {
                x_m = magma_cblas_dzasum( m, A(0,j), incx );
                
                x_c = cblas_dzasum( m, A(0,j), incx );
                diff += fabs( x_m - x_c );
                
                x_c = blasf77_dzasum( &m, A(0,j), &incx );
                error += fabs( (x_m - x_c) / (m*x_c) );
            }
            output( "dzasum", diff, error );
            total_diff  += diff;
            total_error += error;
        }
        
        // ----- test DZNRM2
        // get two-norm of column j of A
        if ( incx > 0 && incx == incy ) {  // positive, no incy
            diff  = 0;
            error = 0;
            for( int j = 0; j < k; ++j ) {
                x_m = magma_cblas_dznrm2( m, A(0,j), incx );
                
                x_c = cblas_dznrm2( m, A(0,j), incx );
                diff += fabs( x_m - x_c );
                
                x_c = blasf77_dznrm2( &m, A(0,j), &incx );
                error += fabs( (x_m - x_c) / (m*x_c) );
            }
            output( "dznrm2", diff, error );
            total_diff  += diff;
            total_error += error;
        }
        
        // ----- test ZDOTC
        // dot columns, Aj^H Bj
        diff  = 0;
        error = 0;
        for( int j = 0; j < k; ++j ) {
            // MAGMA implementation, not just wrapper
            x2_m = magma_cblas_zdotc( m, A(0,j), incx, B(0,j), incy );
            
            // crashes on MKL 11.1.2, ILP64
            #if ! defined( MAGMA_WITH_MKL )
                #ifdef COMPLEX
                cblas_zdotc_sub( m, A(0,j), incx, B(0,j), incy, &x2_c );
                #else
                x2_c = cblas_zdotc( m, A(0,j), incx, B(0,j), incy );
                #endif
                error += fabs( x2_m - x2_c ) / fabs( m*x2_c );
            #endif
            
            // crashes on MacOS 10.9
            #if ! defined( __APPLE__ )
                x2_c = blasf77_zdotc( &m, A(0,j), &incx, B(0,j), &incy );
                error += fabs( x2_m - x2_c ) / fabs( m*x2_c );
            #endif
        }
        output( "zdotc", diff, error );
        total_diff  += diff;
        total_error += error;
        total_error += error;
        
        // ----- test ZDOTU
        // dot columns, Aj^T * Bj
        diff  = 0;
        error = 0;
        for( int j = 0; j < k; ++j ) {
            // MAGMA implementation, not just wrapper
            x2_m = magma_cblas_zdotu( m, A(0,j), incx, B(0,j), incy );
            
            // crashes on MKL 11.1.2, ILP64
            #if ! defined( MAGMA_WITH_MKL )
                #ifdef COMPLEX
                cblas_zdotu_sub( m, A(0,j), incx, B(0,j), incy, &x2_c );
                #else
                x2_c = cblas_zdotu( m, A(0,j), incx, B(0,j), incy );
                #endif
                error += fabs( x2_m - x2_c ) / fabs( m*x2_c );
            #endif
            
            // crashes on MacOS 10.9
            #if ! defined( __APPLE__ )
                x2_c = blasf77_zdotu( &m, A(0,j), &incx, B(0,j), &incy );
                error += fabs( x2_m - x2_c ) / fabs( m*x2_c );
            #endif
        }
        output( "zdotu", diff, error );
        total_diff  += diff;
        total_error += error;
        
        // tell user about disabled functions
        #if defined( MAGMA_WITH_MKL )
            printf( "cblas_zdotc and cblas_zdotu disabled with MKL (segfaults)\n" );
        #endif
        
        #if defined( __APPLE__ )
            printf( "blasf77_zdotc and blasf77_zdotu disabled on MacOS (segfaults)\n" );
        #endif
            
        // cleanup
        magma_free_pinned( A );
        magma_free_pinned( B );
        fflush( stdout );
    }}}  // itest, incx, incy
    
    // TODO use average error?
    printf( "sum diffs  = %8.2g, MAGMA wrapper        compared to CBLAS and Fortran BLAS; should be exactly 0.\n"
            "sum errors = %8.2e, MAGMA implementation compared to CBLAS and Fortran BLAS; should be ~ machine epsilon.\n\n",
            total_diff, total_error );
    if ( total_diff != 0. ) {
        printf( "some tests failed diff == 0.; see above.\n" );
    }
    else {
        printf( "all tests passed diff == 0.\n" );
    }
    
    TESTING_FINALIZE();
    
    int status = (total_diff != 0.);
    return status;
}
Esempio n. 5
0
// ----------------------------------------
int main( int argc, char** argv )
{
    TESTING_INIT();
    
    //real_Double_t   t_m, t_c, t_f;
    magma_int_t ione = 1;
    
    magmaDoubleComplex  *A, *B;
    double error_cblas, error_fblas, error_inline;
    magma_int_t ISEED[4] = {0,0,0,1};
    magma_int_t i, j, k, m, n, size, maxn, ld;
    
    // complex x for magma, cblas, fortran, inline blas respectively
    magmaDoubleComplex x2_m, x2_c, x2_f, x2_i;
    
    // real    x for magma, cblas, fortran, inline blas respectively
    double x_m, x_c, x_f, x_i;
    
    MAGMA_UNUSED( x_c  );
    MAGMA_UNUSED( x_f  );
    MAGMA_UNUSED( x2_c );
    MAGMA_UNUSED( x2_f );
    MAGMA_UNUSED( x2_m );
    
    magma_opts opts;
    opts.parse_opts( argc, argv );
    
    opts.tolerance = max( 100., opts.tolerance );
    double tol = opts.tolerance * lapackf77_dlamch("E");
    gTol = tol;
    
    magma_int_t inc[] = { -2, -1, 1, 2 };  //{ 1 };  //{ -1, 1 };
    magma_int_t ninc = sizeof(inc)/sizeof(*inc);
    magma_int_t maxinc = 0;
    for( i=0; i < ninc; ++i ) {
        maxinc = max( maxinc, abs(inc[i]) );
    }
    
    printf( "!! Calling these CBLAS and Fortran BLAS sometimes crashes (segfaults), which !!\n"
            "!! is why we use wrappers. It does not necesarily indicate a bug in MAGMA.   !!\n"
            "!! If MAGMA_WITH_MKL or __APPLE__ are defined, known failures are skipped.   !!\n"
            "\n" );
    
    // tell user about disabled functions
    #ifndef HAVE_CBLAS
        printf( "n/a: HAVE_CBLAS not defined, so no cblas functions tested.\n\n" );
    #endif
    
    #if defined(MAGMA_WITH_MKL)
        printf( "n/a: cblas_zdotc, cblas_zdotu, blasf77_zdotc, and blasf77_zdotu are disabled with MKL, due to segfaults.\n\n" );
    #endif
    
    #if defined(__APPLE__)
        printf( "n/a: blasf77_zdotc and blasf77_zdotu are disabled on MacOS, due to segfaults.\n\n" );
    #endif
    
    printf( "%%                                          Error w.r.t.   Error w.r.t.   Error w.r.t.\n"
            "%%   M     N     K  incx  incy   Function   CBLAS          Fortran BLAS   inline\n"
            "%%====================================================================================\n" );
    for( int itest = 0; itest < opts.ntest; ++itest ) {
        if ( itest > 0 ) {
            printf( "%%----------------------------------------------------------------------\n" );
        }
        
        m = opts.msize[itest];
        n = opts.nsize[itest];
        k = opts.ksize[itest];
        
        // allocate matrices
        // over-allocate so they can be any combination of
        // {m,n,k} * {abs(incx), abs(incy)} by
        // {m,n,k} * {abs(incx), abs(incy)}
        maxn = max( max( m, n ), k ) * maxinc;
        ld = max( 1, maxn );
        size = ld*maxn;
        TESTING_MALLOC_CPU( A, magmaDoubleComplex, size );
        TESTING_MALLOC_CPU( B, magmaDoubleComplex, size );
        
        // initialize matrices
        lapackf77_zlarnv( &ione, ISEED, &size, A );
        lapackf77_zlarnv( &ione, ISEED, &size, B );
        
        // ----- test DZASUM
        for( int iincx = 0; iincx < ninc; ++iincx ) {
            magma_int_t incx = inc[iincx];
            
            for( int iincy = 0; iincy < ninc; ++iincy ) {
                magma_int_t incy = inc[iincy];
                
                // get one-norm of column j of A
                if ( incx > 0 && incx == incy ) {  // positive, no incy
                    error_cblas  = 0;
                    error_fblas  = 0;
                    error_inline = 0;
                    for( j=0; j < k; ++j ) {
                        x_m = magma_cblas_dzasum( m, A(0,j), incx );
                        
                        #ifdef HAVE_CBLAS
                            x_c = cblas_dzasum( m, A(0,j), incx );
                            error_cblas = max( error_cblas, fabs(x_m - x_c) / fabs(m*x_c) );
                        #else
                            x_c = 0;
                            error_cblas = SKIPPED_FLAG;
                        #endif
                        
                        x_f = blasf77_dzasum( &m, A(0,j), &incx );
                        error_fblas = max( error_fblas, fabs(x_m - x_f) / fabs(m*x_f) );
                        
                        // inline implementation
                        x_i = 0;
                        for( i=0; i < m; ++i ) {
                            x_i += MAGMA_Z_ABS1( *A(i*incx,j) );  // |real(Aij)| + |imag(Aij)|
                        }
                        error_inline = max( error_inline, fabs(x_m - x_i) / fabs(m*x_i) );
                        
                        //printf( "dzasum xm %.8e, xc %.8e, xf %.8e, xi %.8e\n", x_m, x_c, x_f, x_i );
                    }
                    output( "dzasum", m, n, k, incx, incy, error_cblas, error_fblas, error_inline );
                }
            }
        }
        printf( "\n" );
        
        // ----- test DZNRM2
        // get two-norm of column j of A
        for( int iincx = 0; iincx < ninc; ++iincx ) {
            magma_int_t incx = inc[iincx];
            
            for( int iincy = 0; iincy < ninc; ++iincy ) {
                magma_int_t incy = inc[iincy];
                
                if ( incx > 0 && incx == incy ) {  // positive, no incy
                    error_cblas  = 0;
                    error_fblas  = 0;
                    error_inline = 0;
                    for( j=0; j < k; ++j ) {
                        x_m = magma_cblas_dznrm2( m, A(0,j), incx );
                        
                        #ifdef HAVE_CBLAS
                            x_c = cblas_dznrm2( m, A(0,j), incx );
                            error_cblas = max( error_cblas, fabs(x_m - x_c) / fabs(m*x_c) );
                        #else
                            x_c = 0;
                            error_cblas = SKIPPED_FLAG;
                        #endif
                        
                        x_f = blasf77_dznrm2( &m, A(0,j), &incx );
                        error_fblas = max( error_fblas, fabs(x_m - x_f) / fabs(m*x_f) );
                        
                        // inline implementation (poor -- doesn't scale)
                        x_i = 0;
                        for( i=0; i < m; ++i ) {
                            x_i += real( *A(i*incx,j) ) * real( *A(i*incx,j) )
                                +  imag( *A(i*incx,j) ) * imag( *A(i*incx,j) );
                            // same: real( conj( *A(i*incx,j) ) * *A(i*incx,j) );
                        }
                        x_i = sqrt( x_i );
                        error_inline = max( error_inline, fabs(x_m - x_i) / fabs(m*x_i) );
                        
                        //printf( "dznrm2 xm %.8e, xc %.8e, xf %.8e, xi %.8e\n", x_m, x_c, x_f, x_i );
                    }
                    output( "dznrm2", m, n, k, incx, incy, error_cblas, error_fblas, error_inline );
                }
            }
        }
        printf( "\n" );
        
        // ----- test ZDOTC
        // dot columns, Aj^H Bj
        for( int iincx = 0; iincx < ninc; ++iincx ) {
            magma_int_t incx = inc[iincx];
            
            for( int iincy = 0; iincy < ninc; ++iincy ) {
                magma_int_t incy = inc[iincy];
                
                error_cblas  = 0;
                error_fblas  = 0;
                error_inline = 0;
                for( j=0; j < k; ++j ) {
                    // MAGMA implementation, not just wrapper
                    x2_m = magma_cblas_zdotc( m, A(0,j), incx, B(0,j), incy );
                    
                    // crashes with MKL 11.1.2, ILP64
                    #if defined(HAVE_CBLAS) && ! defined(MAGMA_WITH_MKL)
                        #ifdef COMPLEX
                        cblas_zdotc_sub( m, A(0,j), incx, B(0,j), incy, &x2_c );
                        #else
                        x2_c = cblas_zdotc( m, A(0,j), incx, B(0,j), incy );
                        #endif
                        error_cblas = max( error_cblas, fabs(x2_m - x2_c) / fabs(m*x2_c) );
                    #else
                        x2_c = MAGMA_Z_ZERO;
                        error_cblas = SKIPPED_FLAG;
                    #endif
                    
                    // crashes with MKL 11.2.3 and MacOS 10.9
                    #if (! defined(COMPLEX) || ! defined(MAGMA_WITH_MKL)) && ! defined(__APPLE__)
                        x2_f = blasf77_zdotc( &m, A(0,j), &incx, B(0,j), &incy );
                        error_fblas = max( error_fblas, fabs(x2_m - x2_f) / fabs(m*x2_f) );
                    #else
                        x2_f = MAGMA_Z_ZERO;
                        error_fblas = SKIPPED_FLAG;
                    #endif
                    
                    // inline implementation
                    x2_i = MAGMA_Z_ZERO;
                    magma_int_t A_offset = (incx > 0 ? 0 : (-n + 1)*incx);
                    magma_int_t B_offset = (incy > 0 ? 0 : (-n + 1)*incy);
                    for( i=0; i < m; ++i ) {
                        x2_i += conj( *A(A_offset + i*incx,j) ) * *B(B_offset + i*incy,j);
                    }
                    error_inline = max( error_inline, fabs(x2_m - x2_i) / fabs(m*x2_i) );
                    
                    //printf( "zdotc xm %.8e + %.8ei, xc %.8e + %.8ei, xf %.8e + %.8ei, xi %.8e + %.8ei\n",
                    //        real(x2_m), imag(x2_m),
                    //        real(x2_c), imag(x2_c),
                    //        real(x2_f), imag(x2_f),
                    //        real(x2_i), imag(x2_i) );
                }
                output( "zdotc", m, n, k, incx, incy, error_cblas, error_fblas, error_inline );
            }
        }
        printf( "\n" );
        
        // ----- test ZDOTU
        // dot columns, Aj^T * Bj
        for( int iincx = 0; iincx < ninc; ++iincx ) {
            magma_int_t incx = inc[iincx];
            
            for( int iincy = 0; iincy < ninc; ++iincy ) {
                magma_int_t incy = inc[iincy];
                
                error_cblas  = 0;
                error_fblas  = 0;
                error_inline = 0;
                for( j=0; j < k; ++j ) {
                    // MAGMA implementation, not just wrapper
                    x2_m = magma_cblas_zdotu( m, A(0,j), incx, B(0,j), incy );
                    
                    // crashes with MKL 11.1.2, ILP64
                    #if defined(HAVE_CBLAS) && ! defined(MAGMA_WITH_MKL)
                        #ifdef COMPLEX
                        cblas_zdotu_sub( m, A(0,j), incx, B(0,j), incy, &x2_c );
                        #else
                        x2_c = cblas_zdotu( m, A(0,j), incx, B(0,j), incy );
                        #endif
                        error_cblas = max( error_cblas, fabs(x2_m - x2_c) / fabs(m*x2_c) );
                    #else
                        x2_c = MAGMA_Z_ZERO;
                        error_cblas = SKIPPED_FLAG;
                    #endif
                    
                    // crashes with MKL 11.2.3 and MacOS 10.9
                    #if (! defined(COMPLEX) || ! defined(MAGMA_WITH_MKL)) && ! defined(__APPLE__)
                        x2_f = blasf77_zdotu( &m, A(0,j), &incx, B(0,j), &incy );
                        error_fblas = max( error_fblas, fabs(x2_m - x2_f) / fabs(m*x2_f) );
                    #else
                        x2_f = MAGMA_Z_ZERO;
                        error_fblas = SKIPPED_FLAG;
                    #endif
                    
                    // inline implementation
                    x2_i = MAGMA_Z_ZERO;
                    magma_int_t A_offset = (incx > 0 ? 0 : (-n + 1)*incx);
                    magma_int_t B_offset = (incy > 0 ? 0 : (-n + 1)*incy);
                    for( i=0; i < m; ++i ) {
                        x2_i += *A(A_offset + i*incx,j) * *B(B_offset + i*incy,j);
                    }
                    error_inline = max( error_inline, fabs(x2_m - x2_i) / fabs(m*x2_i) );
                    
                    //printf( "zdotu xm %.8e + %.8ei, xc %.8e + %.8ei, xf %.8e + %.8ei, xi %.8e + %.8ei\n",
                    //        real(x2_m), imag(x2_m),
                    //        real(x2_c), imag(x2_c),
                    //        real(x2_f), imag(x2_f),
                    //        real(x2_i), imag(x2_i) );
                }
                output( "zdotu", m, n, k, incx, incy, error_cblas, error_fblas, error_inline );
            }
        }
        
        // cleanup
        TESTING_FREE_CPU( A );
        TESTING_FREE_CPU( B );
        fflush( stdout );
    }  // itest, incx, incy
    
    opts.cleanup();
    TESTING_FINALIZE();
    return gStatus;
}
extern "C" magma_int_t
magma_zgeev(magma_vec_t jobvl, magma_vec_t jobvr, magma_int_t n,
            magmaDoubleComplex *a, magma_int_t lda,
            magmaDoubleComplex *geev_w_array,
            magmaDoubleComplex *vl, magma_int_t ldvl,
            magmaDoubleComplex *vr, magma_int_t ldvr,
            magmaDoubleComplex *work, magma_int_t lwork,
            double *rwork, magma_int_t *info, magma_queue_t queue)
{
/*  -- clMAGMA (version 1.0.0) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       September 2012

    Purpose   
    =======   
    ZGEEV computes for an N-by-N complex nonsymmetric matrix A, the   
    eigenvalues and, optionally, the left and/or right eigenvectors.   

    The right eigenvector v(j) of A satisfies   
                     A * v(j) = lambda(j) * v(j)   
    where lambda(j) is its eigenvalue.   
    The left eigenvector u(j) of A satisfies   
                  u(j)**H * A = lambda(j) * u(j)**H   
    where u(j)**H denotes the conjugate transpose of u(j).   

    The computed eigenvectors are normalized to have Euclidean norm   
    equal to 1 and largest component real.   

    Arguments   
    =========   
    JOBVL   (input) CHARACTER*1   
            = 'N': left eigenvectors of A are not computed;   
            = 'V': left eigenvectors of are computed.   

    JOBVR   (input) CHARACTER*1   
            = 'N': right eigenvectors of A are not computed;   
            = 'V': right eigenvectors of A are computed.   

    N       (input) INTEGER   
            The order of the matrix A. N >= 0.   

    A       (input/output) COMPLEX*16 array, dimension (LDA,N)   
            On entry, the N-by-N matrix A.   
            On exit, A has been overwritten.   

    LDA     (input) INTEGER   
            The leading dimension of the array A.  LDA >= max(1,N).   

    W       (output) COMPLEX*16 array, dimension (N)   
            W contains the computed eigenvalues.   

    VL      (output) COMPLEX*16 array, dimension (LDVL,N)   
            If JOBVL = 'V', the left eigenvectors u(j) are stored one   
            after another in the columns of VL, in the same order   
            as their eigenvalues.   
            If JOBVL = 'N', VL is not referenced.   
            u(j) = VL(:,j), the j-th column of VL.   

    LDVL    (input) INTEGER   
            The leading dimension of the array VL.  LDVL >= 1; if   
            JOBVL = 'V', LDVL >= N.   

    VR      (output) COMPLEX*16 array, dimension (LDVR,N)   
            If JOBVR = 'V', the right eigenvectors v(j) are stored one   
            after another in the columns of VR, in the same order   
            as their eigenvalues.   
            If JOBVR = 'N', VR is not referenced.   
            v(j) = VR(:,j), the j-th column of VR.   

    LDVR    (input) INTEGER   
            The leading dimension of the array VR.  LDVR >= 1; if   
            JOBVR = 'V', LDVR >= N.   

    WORK    (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))   
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.   

    LWORK   (input) INTEGER   
            The dimension of the array WORK.  LWORK >= (1+nb)*N.   

            If LWORK = -1, then a workspace query is assumed; the routine   
            only calculates the optimal size of the WORK array, returns   
            this value as the first entry of the WORK array, and no error   
            message related to LWORK is issued by XERBLA.   

    RWORK   (workspace) DOUBLE PRECISION array, dimension (2*N)   

    INFO    (output) INTEGER   
            = 0:  successful exit   
            < 0:  if INFO = -i, the i-th argument had an illegal value.   
            > 0:  if INFO = i, the QR algorithm failed to compute all the   
                  eigenvalues, and no eigenvectors have been computed;   
                  elements and i+1:N of W contain eigenvalues which have   
                  converged.   
    =====================================================================    */

    magma_int_t c__1 = 1;
    magma_int_t c__0 = 0;
    
    magma_int_t a_dim1, a_offset, vl_dim1, vl_offset, vr_dim1, vr_offset, i__1, 
            i__2, i__3;
    double d__1, d__2;
    magmaDoubleComplex z__1, z__2;

    magma_int_t i__, k, ihi;
    double scl;
    magma_int_t ilo;
    double dum[1], eps;
    magmaDoubleComplex tmp;
    magma_int_t ibal;
    double anrm;
    magma_int_t ierr, itau, iwrk, nout;
    magma_int_t scalea;
    double cscale;
    magma_int_t select[1];
    double bignum;
    magma_int_t minwrk;
    magma_int_t wantvl;
    double smlnum;
    magma_int_t irwork;
    magma_int_t lquery, wantvr;
    magma_int_t nb = 0;
    magmaDoubleComplex_ptr dT;

    //magma_timestr_t start, end;

    char side[2] = {0, 0};
    magma_vec_t jobvl_ = jobvl;
    magma_vec_t jobvr_ = jobvr;

    *info = 0;
    lquery = lwork == -1;
    wantvl = lapackf77_lsame(lapack_const(jobvl_), "V");
    wantvr = lapackf77_lsame(lapack_const(jobvr_), "V");
    if (! wantvl && ! lapackf77_lsame(lapack_const(jobvl_), "N")) {
        *info = -1;
    } else if (! wantvr && ! lapackf77_lsame(lapack_const(jobvr_), "N")) {
        *info = -2;
    } else if (n < 0) {
        *info = -3;
    } else if (lda < max(1,n)) {
        *info = -5;
    } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) {
        *info = -8;
    } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) {
        *info = -10;
    }

    /*  Compute workspace   */
    if (*info == 0) {
        nb = magma_get_zgehrd_nb(n);
        minwrk = (1+nb)*n;
        work[0] = MAGMA_Z_MAKE((double) minwrk, 0.);

        if (lwork < minwrk && ! lquery) {
            *info = -12;
        }
    }   

    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }
    else if (lquery) {
        return *info;
    }

    /* Quick return if possible */
    if (n == 0) {
        return *info;
    }
   
    // if eigenvectors are needed
#if defined(VERSION3)
    if (MAGMA_SUCCESS != magma_malloc(&dT, nb*n*sizeof(magmaDoubleComplex) )) {
        *info = MAGMA_ERR_DEVICE_ALLOC;
        return *info;
    }
#endif

    a_dim1 = lda;
    a_offset = 1 + a_dim1;
    a -= a_offset;
    vl_dim1 = ldvl;
    vl_offset = 1 + vl_dim1;
    vl -= vl_offset;
    vr_dim1 = ldvr;
    vr_offset = 1 + vr_dim1;
    vr -= vr_offset;
    --work;
    --rwork;

    /* Get machine constants */
    eps    = lapackf77_dlamch("P");
    smlnum = lapackf77_dlamch("S");
    bignum = 1. / smlnum;
    lapackf77_dlabad(&smlnum, &bignum);
    smlnum = magma_dsqrt(smlnum) / eps;
    bignum = 1. / smlnum;

    /* Scale A if max element outside range [SMLNUM,BIGNUM] */
    anrm = lapackf77_zlange("M", &n, &n, &a[a_offset], &lda, dum);
    scalea = 0;
    if (anrm > 0. && anrm < smlnum) {
        scalea = 1;
        cscale = smlnum;
    } else if (anrm > bignum) {
        scalea = 1;
        cscale = bignum;
    }
    if (scalea) {
        lapackf77_zlascl("G", &c__0, &c__0, &anrm, &cscale, &n, &n, &a[a_offset], &lda, &
                ierr);
    }

    /* Balance the matrix   
       (CWorkspace: none)   
       (RWorkspace: need N) */
    ibal = 1;
    lapackf77_zgebal("B", &n, &a[a_offset], &lda, &ilo, &ihi, &rwork[ibal], &ierr);

    /* Reduce to upper Hessenberg form   
       (CWorkspace: need 2*N, prefer N+N*NB)   
       (RWorkspace: none) */
    itau = 1;
    iwrk = itau + n;
    i__1 = lwork - iwrk + 1;

    //start = get_current_time();
#if defined(VERSION1)
    /*
     * Version 1 - LAPACK
     */
    lapackf77_zgehrd(&n, &ilo, &ihi, &a[a_offset], &lda,
                     &work[itau], &work[iwrk], &i__1, &ierr);
#elif defined(VERSION2)
    /*
     *  Version 2 - LAPACK consistent HRD
     */
    magma_zgehrd2(n, ilo, ihi, &a[a_offset], lda,
                  &work[itau], &work[iwrk], &i__1, &ierr);    
#elif defined(VERSION3)
    /*  
     * Version 3 - LAPACK consistent MAGMA HRD + matrices T stored, 
     */
    magma_zgehrd(n, ilo, ihi, &a[a_offset], lda,
                 &work[itau], &work[iwrk], i__1, dT, 0, &ierr, queue);
#endif
    //end = get_current_time();
    //printf("    Time for zgehrd = %5.2f sec\n", GetTimerValue(start,end)/1000.);

    if (wantvl) {
      /*        Want left eigenvectors   
                Copy Householder vectors to VL */
        side[0] = 'L';
        lapackf77_zlacpy(MagmaLowerStr, &n, &n, 
                         &a[a_offset], &lda, &vl[vl_offset], &ldvl);

        /* Generate unitary matrix in VL   
           (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)   
           (RWorkspace: none) */
        i__1 = lwork - iwrk + 1;

        //start = get_current_time();
#if defined(VERSION1) || defined(VERSION2)
        /*
         * Version 1 & 2 - LAPACK
         */
        lapackf77_zunghr(&n, &ilo, &ihi, &vl[vl_offset], &ldvl, 
                         &work[itau], &work[iwrk], &i__1, &ierr);
#elif defined(VERSION3)
        /*
         * Version 3 - LAPACK consistent MAGMA HRD + matrices T stored
         */
        magma_zunghr(n, ilo, ihi, &vl[vl_offset], ldvl, &work[itau], 
                     dT, 0, nb, &ierr, queue);
#endif
        //end = get_current_time();
        //printf("    Time for zunghr = %5.2f sec\n", GetTimerValue(start,end)/1000.);

        /* Perform QR iteration, accumulating Schur vectors in VL   
           (CWorkspace: need 1, prefer HSWORK (see comments) )   
           (RWorkspace: none) */
        iwrk = itau;
        i__1 = lwork - iwrk + 1;
        lapackf77_zhseqr("S", "V", &n, &ilo, &ihi, &a[a_offset], &lda, geev_w_array,
                &vl[vl_offset], &ldvl, &work[iwrk], &i__1, info);

        if (wantvr) 
          {
            /* Want left and right eigenvectors   
               Copy Schur vectors to VR */
            side[0] = 'B';
            lapackf77_zlacpy("F", &n, &n, &vl[vl_offset], &ldvl, &vr[vr_offset], &ldvr);
          }

    } else if (wantvr) {
        /*  Want right eigenvectors   
            Copy Householder vectors to VR */
        side[0] = 'R';
        lapackf77_zlacpy("L", &n, &n, &a[a_offset], &lda, &vr[vr_offset], &ldvr);

        /* Generate unitary matrix in VR   
           (CWorkspace: need 2*N-1, prefer N+(N-1)*NB)   
           (RWorkspace: none) */
        i__1 = lwork - iwrk + 1;
        //start = get_current_time();
#if defined(VERSION1) || defined(VERSION2)
        /*
         * Version 1 & 2 - LAPACK
         */
        lapackf77_zunghr(&n, &ilo, &ihi, &vr[vr_offset], &ldvr, 
                         &work[itau], &work[iwrk], &i__1, &ierr);
#elif defined(VERSION3)
        /*
         * Version 3 - LAPACK consistent MAGMA HRD + matrices T stored
         */
        magma_zunghr(n, ilo, ihi, &vr[vr_offset], ldvr, 
                     &work[itau], dT, 0, nb, &ierr, queue);
#endif
        //end = get_current_time();
        //printf("    Time for zunghr = %5.2f sec\n", GetTimerValue(start,end)/1000.);

        /* Perform QR iteration, accumulating Schur vectors in VR   
           (CWorkspace: need 1, prefer HSWORK (see comments) )   
           (RWorkspace: none) */
        iwrk = itau;
        i__1 = lwork - iwrk + 1;
        lapackf77_zhseqr("S", "V", &n, &ilo, &ihi, &a[a_offset], &lda, geev_w_array, 
                &vr[vr_offset], &ldvr, &work[iwrk], &i__1, info);
    } else {
      /*  Compute eigenvalues only   
          (CWorkspace: need 1, prefer HSWORK (see comments) )   
          (RWorkspace: none) */
        iwrk = itau;
        i__1 = lwork - iwrk + 1;
        lapackf77_zhseqr("E", "N", &n, &ilo, &ihi, &a[a_offset], &lda, geev_w_array,
                &vr[vr_offset], &ldvr, &work[iwrk], &i__1, info);
    }

    /* If INFO > 0 from ZHSEQR, then quit */
    if (*info > 0) {
        goto L50;
    }

    if (wantvl || wantvr) {
        /*  Compute left and/or right eigenvectors   
            (CWorkspace: need 2*N)   
            (RWorkspace: need 2*N) */
        irwork = ibal + n;
        lapackf77_ztrevc(side, "B", select, &n, &a[a_offset], &lda, &vl[vl_offset], &ldvl,
                &vr[vr_offset], &ldvr, &n, &nout, &work[iwrk], &rwork[irwork], 
                &ierr);
    }

    if (wantvl) {
        /*  Undo balancing of left eigenvectors   
            (CWorkspace: none)   
            (RWorkspace: need N) */
        lapackf77_zgebak("B", "L", &n, &ilo, &ihi, &rwork[ibal], &n, 
                         &vl[vl_offset], &ldvl, &ierr);

        /* Normalize left eigenvectors and make largest component real */
        for (i__ = 1; i__ <= n; ++i__) {
            scl = 1. / cblas_dznrm2(n, &vl[i__ * vl_dim1 + 1], 1);
            cblas_zdscal(n, scl, &vl[i__ * vl_dim1 + 1], 1);
            i__2 = n;
            for (k = 1; k <= i__2; ++k) 
            {
                i__3 = k + i__ * vl_dim1;
                /* Computing 2nd power */
                d__1 = MAGMA_Z_REAL(vl[i__3]);
                /* Computing 2nd power */
                d__2 = MAGMA_Z_IMAG(vl[k + i__ * vl_dim1]);
                rwork[irwork + k - 1] = d__1 * d__1 + d__2 * d__2;
            }
            /* Comment:
                   Fortran BLAS does not have to add 1
                   C       BLAS must add one to cblas_idamax */
            k = cblas_idamax(n, &rwork[irwork], 1)+1;
            z__2 = MAGMA_Z_CNJG(vl[k + i__ * vl_dim1]);
            d__1 = magma_dsqrt(rwork[irwork + k - 1]);
            MAGMA_Z_DSCALE(z__1, z__2, d__1);
            tmp = z__1;
            cblas_zscal(n, CBLAS_SADDR(tmp), &vl[i__ * vl_dim1 + 1], 1);
            i__2 = k + i__ * vl_dim1;
            i__3 = k + i__ * vl_dim1;
            d__1 = MAGMA_Z_REAL(vl[i__3]);
            MAGMA_Z_SET2REAL(z__1, d__1);
            vl[i__2] = z__1;
        }
    }

    if (wantvr) {
      /*  Undo balancing of right eigenvectors   
          (CWorkspace: none)   
          (RWorkspace: need N) */
        lapackf77_zgebak("B", "R", &n, &ilo, &ihi, &rwork[ibal], &n, 
                         &vr[vr_offset], &ldvr, &ierr);

        /* Normalize right eigenvectors and make largest component real */
        for (i__ = 1; i__ <= n; ++i__) {
            scl = 1. / cblas_dznrm2(n, &vr[i__ * vr_dim1 + 1], 1);
            cblas_zdscal(n, scl, &vr[i__ * vr_dim1 + 1], 1);
            i__2 = n;
            for (k = 1; k <= i__2; ++k) {
                i__3 = k + i__ * vr_dim1;
                /* Computing 2nd power */
                d__1 = MAGMA_Z_REAL(vr[i__3]);
                /* Computing 2nd power */
                d__2 = MAGMA_Z_IMAG(vr[k + i__ * vr_dim1]);
                rwork[irwork + k - 1] = d__1 * d__1 + d__2 * d__2;
            }
            /* Comment:
                   Fortran BLAS does not have to add 1
                   C       BLAS must add one to cblas_idamax */
            k = cblas_idamax(n, &rwork[irwork], 1)+1;
            z__2 = MAGMA_Z_CNJG(vr[k + i__ * vr_dim1]);
            d__1 = magma_dsqrt(rwork[irwork + k - 1]);
            MAGMA_Z_DSCALE(z__1, z__2, d__1);
            tmp = z__1;
            cblas_zscal(n, CBLAS_SADDR(tmp), &vr[i__ * vr_dim1 + 1], 1);
            i__2 = k + i__ * vr_dim1;
            i__3 = k + i__ * vr_dim1;
            d__1 = MAGMA_Z_REAL(vr[i__3]);
            MAGMA_Z_SET2REAL(z__1, d__1);
            vr[i__2] = z__1;
        }
    }

    /*  Undo scaling if necessary */
L50:
    if (scalea) {
        i__1 = n - *info;
        /* Computing MAX */
        i__3 = n - *info;
        i__2 = max(i__3,1);
        lapackf77_zlascl("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, 
                geev_w_array + *info, &i__2, &ierr);
        if (*info > 0) {
            i__1 = ilo - 1;
            lapackf77_zlascl("G", &c__0, &c__0, &cscale, &anrm, &i__1, &c__1, 
                    geev_w_array, &n, &ierr);
        }
    }

#if defined(VERSION3)
    magma_free( dT );
#endif
    return *info;
} /* magma_zgeev */
Esempio n. 7
0
double F77_dznrm2(const int *N, const void *X, const int *incX)
{
   return cblas_dznrm2(*N, X, *incX);
}
Esempio n. 8
0
extern "C" magma_int_t
magma_zgeev_m(
    char jobvl, char jobvr, magma_int_t n,
    magmaDoubleComplex *A, magma_int_t lda,
    magmaDoubleComplex *W,
    magmaDoubleComplex *vl, magma_int_t ldvl,
    magmaDoubleComplex *vr, magma_int_t ldvr,
    magmaDoubleComplex *work, magma_int_t lwork,
    double *rwork, magma_int_t *info )
{
/*  -- MAGMA (version 1.4.1) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       December 2013

    Purpose
    =======
    ZGEEV computes for an N-by-N complex nonsymmetric matrix A, the
    eigenvalues and, optionally, the left and/or right eigenvectors.

    The right eigenvector v(j) of A satisfies
                     A * v(j) = lambda(j) * v(j)
    where lambda(j) is its eigenvalue.
    The left eigenvector u(j) of A satisfies
                  u(j)**H * A = lambda(j) * u(j)**H
    where u(j)**H denotes the conjugate transpose of u(j).

    The computed eigenvectors are normalized to have Euclidean norm
    equal to 1 and largest component real.

    Arguments
    =========
    JOBVL   (input) CHARACTER*1
            = 'N': left eigenvectors of A are not computed;
            = 'V': left eigenvectors of are computed.

    JOBVR   (input) CHARACTER*1
            = 'N': right eigenvectors of A are not computed;
            = 'V': right eigenvectors of A are computed.

    N       (input) INTEGER
            The order of the matrix A. N >= 0.

    A       (input/output) COMPLEX*16 array, dimension (LDA,N)
            On entry, the N-by-N matrix A.
            On exit, A has been overwritten.

    LDA     (input) INTEGER
            The leading dimension of the array A.  LDA >= max(1,N).

    W       (output) COMPLEX*16 array, dimension (N)
            W contains the computed eigenvalues.

    VL      (output) COMPLEX*16 array, dimension (LDVL,N)
            If JOBVL = 'V', the left eigenvectors u(j) are stored one
            after another in the columns of VL, in the same order
            as their eigenvalues.
            If JOBVL = 'N', VL is not referenced.
            u(j) = VL(:,j), the j-th column of VL.

    LDVL    (input) INTEGER
            The leading dimension of the array VL.  LDVL >= 1; if
            JOBVL = 'V', LDVL >= N.

    VR      (output) COMPLEX*16 array, dimension (LDVR,N)
            If JOBVR = 'V', the right eigenvectors v(j) are stored one
            after another in the columns of VR, in the same order
            as their eigenvalues.
            If JOBVR = 'N', VR is not referenced.
            v(j) = VR(:,j), the j-th column of VR.

    LDVR    (input) INTEGER
            The leading dimension of the array VR.  LDVR >= 1; if
            JOBVR = 'V', LDVR >= N.

    WORK    (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
            On exit, if INFO = 0, WORK(1) returns the optimal LWORK.

    LWORK   (input) INTEGER
            The dimension of the array WORK.  LWORK >= (1+nb)*N.

            If LWORK = -1, then a workspace query is assumed; the routine
            only calculates the optimal size of the WORK array, returns
            this value as the first entry of the WORK array, and no error
            message related to LWORK is issued by XERBLA.

    RWORK   (workspace) DOUBLE PRECISION array, dimension (2*N)

    INFO    (output) INTEGER
            = 0:  successful exit
            < 0:  if INFO = -i, the i-th argument had an illegal value.
            > 0:  if INFO = i, the QR algorithm failed to compute all the
                  eigenvalues, and no eigenvectors have been computed;
                  elements and i+1:N of W contain eigenvalues which have
                  converged.
    =====================================================================    */

    #define vl(i,j)  (vl + (i) + (j)*ldvl)
    #define vr(i,j)  (vr + (i) + (j)*ldvr)
    
    magma_int_t c_one  = 1;
    magma_int_t c_zero = 0;
    
    double d__1, d__2;
    magmaDoubleComplex z__1, z__2;
    magmaDoubleComplex tmp;
    double scl;
    double dum[1], eps;
    double anrm, cscale, bignum, smlnum;
    magma_int_t i, k, ilo, ihi;
    magma_int_t ibal, ierr, itau, iwrk, nout, liwrk, i__1, i__2, nb;
    magma_int_t scalea, minwrk, irwork, lquery, wantvl, wantvr, select[1];

    char side[2]   = {0, 0};
    char jobvl_[2] = {jobvl, 0};
    char jobvr_[2] = {jobvr, 0};

    irwork = 0;
    *info = 0;
    lquery = lwork == -1;
    wantvl = lapackf77_lsame( jobvl_, "V" );
    wantvr = lapackf77_lsame( jobvr_, "V" );
    if (! wantvl && ! lapackf77_lsame( jobvl_, "N" )) {
        *info = -1;
    } else if (! wantvr && ! lapackf77_lsame( jobvr_, "N" )) {
        *info = -2;
    } else if (n < 0) {
        *info = -3;
    } else if (lda < max(1,n)) {
        *info = -5;
    } else if ( (ldvl < 1) || (wantvl && (ldvl < n))) {
        *info = -8;
    } else if ( (ldvr < 1) || (wantvr && (ldvr < n))) {
        *info = -10;
    }

    /* Compute workspace */
    nb = magma_get_zgehrd_nb( n );
    if (*info == 0) {
        minwrk = (1+nb)*n;
        work[0] = MAGMA_Z_MAKE( minwrk, 0 );

        if (lwork < minwrk && ! lquery) {
            *info = -12;
        }
    }

    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }
    else if (lquery) {
        return *info;
    }

    /* Quick return if possible */
    if (n == 0) {
        return *info;
    }
    
    #if defined(Version3) || defined(Version4) || defined(Version5)
    magmaDoubleComplex *dT;
    if (MAGMA_SUCCESS != magma_zmalloc( &dT, nb*n )) {
        *info = MAGMA_ERR_DEVICE_ALLOC;
        return *info;
    }
    #endif
    #if defined(Version4) || defined(Version5)
    magmaDoubleComplex *T;
    if (MAGMA_SUCCESS != magma_zmalloc_cpu( &T, nb*n )) {
        magma_free( dT );
        *info = MAGMA_ERR_HOST_ALLOC;
        return *info;
    }
    #endif

    /* Get machine constants */
    eps    = lapackf77_dlamch( "P" );
    smlnum = lapackf77_dlamch( "S" );
    bignum = 1. / smlnum;
    lapackf77_dlabad( &smlnum, &bignum );
    smlnum = magma_dsqrt( smlnum ) / eps;
    bignum = 1. / smlnum;

    /* Scale A if max element outside range [SMLNUM,BIGNUM] */
    anrm = lapackf77_zlange( "M", &n, &n, A, &lda, dum );
    scalea = 0;
    if (anrm > 0. && anrm < smlnum) {
        scalea = 1;
        cscale = smlnum;
    } else if (anrm > bignum) {
        scalea = 1;
        cscale = bignum;
    }
    if (scalea) {
        lapackf77_zlascl( "G", &c_zero, &c_zero, &anrm, &cscale, &n, &n, A, &lda, &ierr );
    }

    /* Balance the matrix
     * (CWorkspace: none)
     * (RWorkspace: need N) */
    ibal = 0;
    lapackf77_zgebal( "B", &n, A, &lda, &ilo, &ihi, &rwork[ibal], &ierr );

    /* Reduce to upper Hessenberg form
     * (CWorkspace: need 2*N, prefer N + N*NB)
     * (RWorkspace: none) */
    itau = 0;
    iwrk = itau + n;
    liwrk = lwork - iwrk;

    #if defined(Version1)
        // Version 1 - LAPACK
        lapackf77_zgehrd( &n, &ilo, &ihi, A, &lda,
                          &work[itau], &work[iwrk], &liwrk, &ierr );
    #elif defined(Version2)
        // Version 2 - LAPACK consistent HRD
        magma_zgehrd2( n, ilo, ihi, A, lda,
                       &work[itau], &work[iwrk], &liwrk, &ierr );
    #elif defined(Version3)
        // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored,
        magma_zgehrd( n, ilo, ihi, A, lda,
                      &work[itau], &work[iwrk], liwrk, dT, &ierr );
    #elif defined(Version4) || defined(Version5)
        // Version 4 - Multi-GPU, T on host
        magma_zgehrd_m( n, ilo, ihi, A, lda,
                        &work[itau], &work[iwrk], liwrk, T, &ierr );
        magma_zsetmatrix( nb, n, T, nb, dT, nb );
    #endif

    if (wantvl) {
        /* Want left eigenvectors
         * Copy Householder vectors to VL */
        side[0] = 'L';
        lapackf77_zlacpy( MagmaLowerStr, &n, &n, A, &lda, vl, &ldvl );

        /* Generate unitary matrix in VL
         * (CWorkspace: need 2*N-1, prefer N + (N-1)*NB)
         * (RWorkspace: none) */
        #if defined(Version1) || defined(Version2)
            // Version 1 & 2 - LAPACK
            lapackf77_zunghr( &n, &ilo, &ihi, vl, &ldvl, &work[itau],
                              &work[iwrk], &liwrk, &ierr );
        #elif defined(Version3) || defined(Version4)
            // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored
            magma_zunghr( n, ilo, ihi, vl, ldvl, &work[itau], dT, nb, &ierr );
        #elif defined(Version5)
            // Version 5 - Multi-GPU, T on host
            magma_zunghr_m( n, ilo, ihi, vl, ldvl, &work[itau], T, nb, &ierr );
        #endif

        /* Perform QR iteration, accumulating Schur vectors in VL
         * (CWorkspace: need 1, prefer HSWORK (see comments) )
         * (RWorkspace: none) */
        iwrk = itau;
        liwrk = lwork - iwrk;
        lapackf77_zhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, W,
                          vl, &ldvl, &work[iwrk], &liwrk, info );

        if (wantvr) {
            /* Want left and right eigenvectors
             * Copy Schur vectors to VR */
            side[0] = 'B';
            lapackf77_zlacpy( "F", &n, &n, vl, &ldvl, vr, &ldvr );
        }
    }
    else if (wantvr) {
        /* Want right eigenvectors
         * Copy Householder vectors to VR */
        side[0] = 'R';
        lapackf77_zlacpy( "L", &n, &n, A, &lda, vr, &ldvr );

        /* Generate unitary matrix in VR
         * (CWorkspace: need 2*N-1, prefer N + (N-1)*NB)
         * (RWorkspace: none) */
        #if defined(Version1) || defined(Version2)
            // Version 1 & 2 - LAPACK
            lapackf77_zunghr( &n, &ilo, &ihi, vr, &ldvr, &work[itau],
                              &work[iwrk], &liwrk, &ierr );
        #elif defined(Version3) || defined(Version4)
            // Version 3 - LAPACK consistent MAGMA HRD + matrices T stored
            magma_zunghr( n, ilo, ihi, vr, ldvr, &work[itau], dT, nb, &ierr );
        #elif defined(Version5)
            // Version 5 - Multi-GPU, T on host
            magma_zunghr_m( n, ilo, ihi, vr, ldvr, &work[itau], T, nb, &ierr );
        #endif

        /* Perform QR iteration, accumulating Schur vectors in VR
         * (CWorkspace: need 1, prefer HSWORK (see comments) )
         * (RWorkspace: none) */
        iwrk = itau;
        liwrk = lwork - iwrk;
        lapackf77_zhseqr( "S", "V", &n, &ilo, &ihi, A, &lda, W,
                          vr, &ldvr, &work[iwrk], &liwrk, info );
    }
    else {
        /* Compute eigenvalues only
         * (CWorkspace: need 1, prefer HSWORK (see comments) )
         * (RWorkspace: none) */
        iwrk = itau;
        liwrk = lwork - iwrk;
        lapackf77_zhseqr( "E", "N", &n, &ilo, &ihi, A, &lda, W,
                          vr, &ldvr, &work[iwrk], &liwrk, info );
    }

    /* If INFO > 0 from ZHSEQR, then quit */
    if (*info > 0) {
        goto CLEANUP;
    }

    if (wantvl || wantvr) {
        /* Compute left and/or right eigenvectors
         * (CWorkspace: need 2*N)
         * (RWorkspace: need 2*N) */
        irwork = ibal + n;
        lapackf77_ztrevc( side, "B", select, &n, A, &lda, vl, &ldvl,
                          vr, &ldvr, &n, &nout, &work[iwrk], &rwork[irwork], &ierr );
    }

    if (wantvl) {
        /* Undo balancing of left eigenvectors
         * (CWorkspace: none)
         * (RWorkspace: need N) */
        lapackf77_zgebak( "B", "L", &n, &ilo, &ihi, &rwork[ibal], &n,
                          vl, &ldvl, &ierr );

        /* Normalize left eigenvectors and make largest component real */
        for (i = 0; i < n; ++i) {
            scl = 1. / cblas_dznrm2( n, vl(0,i), 1 );
            cblas_zdscal( n, scl, vl(0,i), 1 );
            for (k = 0; k < n; ++k) {
                /* Computing 2nd power */
                d__1 = MAGMA_Z_REAL( *vl(k,i) );
                d__2 = MAGMA_Z_IMAG( *vl(k,i) );
                rwork[irwork + k] = d__1*d__1 + d__2*d__2;
            }
            k = cblas_idamax( n, &rwork[irwork], 1 );
            z__2 = MAGMA_Z_CNJG( *vl(k,i) );
            d__1 = magma_dsqrt( rwork[irwork + k] );
            MAGMA_Z_DSCALE( z__1, z__2, d__1 );
            tmp = z__1;
            cblas_zscal( n, CBLAS_SADDR(tmp), vl(0,i), 1 );
            d__1 = MAGMA_Z_REAL( *vl(k,i) );
            z__1 = MAGMA_Z_MAKE( d__1, 0 );
            *vl(k,i) = z__1;
        }
    }

    if (wantvr) {
        /* Undo balancing of right eigenvectors
         * (CWorkspace: none)
         * (RWorkspace: need N) */
        lapackf77_zgebak( "B", "R", &n, &ilo, &ihi, &rwork[ibal], &n,
                          vr, &ldvr, &ierr );

        /* Normalize right eigenvectors and make largest component real */
        for (i = 0; i < n; ++i) {
            scl = 1. / cblas_dznrm2( n, vr(0,i), 1 );
            cblas_zdscal( n, scl, vr(0,i), 1 );
            for (k = 0; k < n; ++k) {
                /* Computing 2nd power */
                d__1 = MAGMA_Z_REAL( *vr(k,i) );
                d__2 = MAGMA_Z_IMAG( *vr(k,i) );
                rwork[irwork + k] = d__1*d__1 + d__2*d__2;
            }
            k = cblas_idamax( n, &rwork[irwork], 1 );
            z__2 = MAGMA_Z_CNJG( *vr(k,i) );
            d__1 = magma_dsqrt( rwork[irwork + k] );
            MAGMA_Z_DSCALE( z__1, z__2, d__1 );
            tmp = z__1;
            cblas_zscal( n, CBLAS_SADDR(tmp), vr(0,i), 1 );
            d__1 = MAGMA_Z_REAL( *vr(k,i) );
            z__1 = MAGMA_Z_MAKE( d__1, 0 );
            *vr(k,i) = z__1;
        }
    }

CLEANUP:
    /* Undo scaling if necessary */
    if (scalea) {
        i__1 = n - (*info);
        i__2 = max( n - (*info), 1 );
        lapackf77_zlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
                          W + (*info), &i__2, &ierr );
        if (*info > 0) {
            i__1 = ilo - 1;
            lapackf77_zlascl( "G", &c_zero, &c_zero, &cscale, &anrm, &i__1, &c_one,
                              W, &n, &ierr );
        }
    }

    #if defined(Version3) || defined(Version4) || defined(Version5)
    magma_free( dT );
    #endif
    #if defined(Version4) || defined(Version5)
    magma_free_cpu( T );
    #endif
    
    return *info;
} /* magma_zgeev */
Esempio n. 9
0
/*!
 * Power method to compute the norm of the operator A.
 * 
 * \retval bound upper bound on norm of the continuos 
 * Fourier transform operator (double).
 * \param[in] A Pointer to the measurement operator.
 * \param[in] A_data Data structure associated to A.
 * \param[in] At Pointer to the the adjoint of the measurement operator.
 * \param[in] At_data Data structure associated to At.
 *
 * \authors Rafael Carrillo
 */
double purify_measurement_pow_meth(void (*A)(void *out, void *in, void **data), 
                                   void **A_data,
                                   void (*At)(void *out, void *in, void **data), 
                                   void **At_data) {

  int i, iter, nx, ny;
  int seedn = 51;
  double bound, norm, rel_ob;
  purify_measurement_cparam *param;
  complex double *y;
  complex double *x;

  
  //Cast input pointers
  param = (purify_measurement_cparam*)A_data[0];
  nx = param->nx1*param->ny1;
  ny = param->nmeas;
  iter = 0;

  y = (complex double*)malloc((ny) * sizeof( complex double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(y);
  x = (complex double*)malloc((nx) * sizeof( complex double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(x);

  if (param->nmeas > nx){
    for (i=0; i < nx; i++) {
        x[i] = purify_ran_gasdev2(seedn) + purify_ran_gasdev2(seedn)*I;
    }
    norm = cblas_dznrm2(nx, (void*)x, 1);
    for (i=0; i < nx; i++) {
        x[i] = x[i]/norm;
    }
    norm = 1.0;

    //main loop
    while (iter < 200){
      A((void*)y, (void*)x, A_data);
      At((void*)x, (void*)y, At_data);
      bound = cblas_dznrm2(nx, (void*)x, 1);
      rel_ob = (bound - norm)/norm;
      if (rel_ob <= 0.001)
        break;
      norm = bound;
      for (i=0; i < nx; i++) {
          x[i] = x[i]/norm;
      }
      iter++;
    }

  }
  else{
    for (i=0; i < ny; i++) {
        y[i] = purify_ran_gasdev2(seedn) + purify_ran_gasdev2(seedn)*I;
    }
    norm = cblas_dznrm2(ny, (void*)y, 1);
    for (i=0; i < ny; i++) {
        y[i] = y[i]/norm;
    }
    norm = 1.0;

    //main loop
    while (iter < 200){
      At((void*)x, (void*)y, At_data);
      A((void*)y, (void*)x, A_data);
      bound = cblas_dznrm2(ny, (void*)y, 1);
      rel_ob = (bound - norm)/norm;
      if (rel_ob <= 0.001)
        break;
      norm = bound;
      for (i=0; i < ny; i++) {
          y[i] = y[i]/norm;
      }
      iter++;
    }

  }

  free(y);
  free(x);

  return bound;

}
Esempio n. 10
0
std::complex<double> HostVector<std::complex<double> >::Norm(void) const {

  return cblas_dznrm2(this->size_, this->vec_, 1);

}
//
// Overloaded function for dispatching to
// * CBLAS backend, and
// * complex<double> value-type.
//
inline double nrm2( const int n, const std::complex<double>* x,
        const int incx ) {
    return cblas_dznrm2( n, x, incx );
}
Esempio n. 12
0
int main(int argc, char *argv[]) {

  

  int i, j, Nx, Ny, Nr, Nb;
  int seedn=54;
  double sigma;
  double a;
  double mse;
  double snr;
  double snr_out;
  double gamma=0.001;
  double aux1, aux2, aux3, aux4;
  complex double alpha;
  

  purify_image img, img_copy;
  purify_visibility_filetype filetype_vis;
  purify_image_filetype filetype_img;
  complex double *xinc;
  complex double *y0;
  complex double *y;
  complex double *noise;
  double *xout;
  double *w;
  double *error;
  complex double *xoutc;
  double *wdx;
  double *wdy;
  double *dummyr;
  complex double *dummyc;

  
  //parameters for the continuos Fourier Transform
  double *deconv;
  purify_sparsemat_row gmat;
  purify_visibility vis_test;
  purify_measurement_cparam param_m1;
  purify_measurement_cparam param_m2;
  complex double *fft_temp1;
  complex double *fft_temp2;
  void *datafwd[5];
  void *dataadj[5];
  fftw_plan planfwd;
  fftw_plan planadj;

  //Structures for sparsity operator
  sopt_wavelet_type *dict_types;
  sopt_wavelet_type *dict_types1;
  sopt_wavelet_type *dict_types2;
  sopt_sara_param param1;
  sopt_sara_param param2;
  sopt_sara_param param3;
  void *datas[1];
  void *datas1[1];
  void *datas2[1];

  //Structures for the opmization problems
  sopt_l1_sdmmparam param4;
  sopt_l1_rwparam param5;
  sopt_prox_tvparam param6;
  sopt_tv_sdmmparam param7;
  sopt_tv_rwparam param8;

  clock_t start, stop;
  double t = 0.0;
  double start1, stop1;
  int dimy, dimx;
  
  //Image dimension of the zero padded image
  //Dimensions should be power of 2
  dimx = 256;
  dimy = 256;

  //Define parameters
  filetype_vis = PURIFY_VISIBILITY_FILETYPE_PROFILE_VIS;
  filetype_img = PURIFY_IMAGE_FILETYPE_FITS;

  //Read coverage
  purify_visibility_readfile(&vis_test,
             "./data/images/Coverages/cont_sim4.vis",
             filetype_vis); 
  printf("Number of visibilities: %i \n\n", vis_test.nmeas);  

   

  // Input image.
  img.fov_x = 1.0 / 180.0 * PURIFY_PI;
  img.fov_y = 1.0 / 180.0 * PURIFY_PI;
  img.nx = 4;
  img.ny = 4;

  //Read input image
  purify_image_readfile(&img, "data/images/Einstein.fits", 1);
  printf("Image dimension: %i, %i \n\n", img.nx, img.ny); 
//  purify_image_writefile(&img, "data/test/Einstein_double.fits", filetype_img);

  param_m1.nmeas = vis_test.nmeas;
  param_m1.ny1 = dimy;
  param_m1.nx1 = dimx;
  param_m1.ofy = 2;
  param_m1.ofx = 2;
  param_m1.ky = 2;
  param_m1.kx = 2;

  param_m2.nmeas = vis_test.nmeas;
  param_m2.ny1 = dimy;
  param_m2.nx1 = dimx;
  param_m2.ofy = 2;
  param_m2.ofx = 2;
  param_m2.ky = 2;
  param_m2.kx = 2;

  Nb = 9;
  Nx=param_m2.ny1*param_m2.nx1;
  Nr=Nb*Nx;
  Ny=param_m2.nmeas;

  //Memory allocation for the different variables
  deconv = (double*)malloc((Nx) * sizeof(double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(deconv);
  xinc = (complex double*)malloc((Nx) * sizeof(complex double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(xinc);
  xout = (double*)malloc((Nx) * sizeof(double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(xout);
  y = (complex double*)malloc((vis_test.nmeas) * sizeof(complex double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(y);
  y0 = (complex double*)malloc((vis_test.nmeas) * sizeof(complex double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(y0);
  noise = (complex double*)malloc((vis_test.nmeas) * sizeof(complex double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(noise);
  w = (double*)malloc((Nr) * sizeof(double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(w);
  error = (double*)malloc((Nx) * sizeof(double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(error);
  xoutc = (complex double*)malloc((Nx) * sizeof(complex double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(xoutc);

  wdx = (double*)malloc((Nx) * sizeof(double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(wdx);
  wdy = (double*)malloc((Nx) * sizeof(double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(wdy);

  dummyr = malloc(Nr * sizeof(double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(dummyr);
  dummyc = malloc(Nr * sizeof(complex double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(dummyc);

  for (i=0; i < Nx; i++){
    xinc[i] = 0.0 + 0.0*I;
  }

  for (i=0; i < img.nx; i++){
    for (j=0; j < img.ny; j++){
      xinc[i+j*param_m1.nx1] = img.pix[i+j*img.nx] + 0.0*I;
    }
  }
  
  //Initialize griding matrix
  assert((start = clock())!=-1);
  purify_measurement_init_cft(&gmat, deconv, vis_test.u, vis_test.v, &param_m1);
  stop = clock();
  t = (double) (stop-start)/CLOCKS_PER_SEC;
  printf("Time initalization: %f \n\n", t);

  for(i = 0; i < img.nx * img.ny; ++i){
    deconv[i] = 1.0;
  }
  
  //Memory allocation for the fft
  i = Nx*param_m1.ofy*param_m1.ofx;
  fft_temp1 = (complex double*)malloc((i) * sizeof(complex double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(fft_temp1);
  fft_temp2 = (complex double*)malloc((i) * sizeof(complex double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(fft_temp2);

  //FFT plan  
  planfwd = fftw_plan_dft_2d(param_m1.nx1*param_m1.ofx, param_m1.ny1*param_m1.ofy, 
              fft_temp1, fft_temp1, 
              FFTW_FORWARD, FFTW_MEASURE);

  planadj = fftw_plan_dft_2d(param_m1.nx1*param_m1.ofx, param_m1.ny1*param_m1.ofy, 
              fft_temp2, fft_temp2, 
              FFTW_BACKWARD, FFTW_MEASURE);


  datafwd[0] = (void*)&param_m1;
  datafwd[1] = (void*)deconv;
  datafwd[2] = (void*)&gmat;
  datafwd[3] = (void*)&planfwd;
  datafwd[4] = (void*)fft_temp1;

  dataadj[0] = (void*)&param_m2;
  dataadj[1] = (void*)deconv;
  dataadj[2] = (void*)&gmat;
  dataadj[3] = (void*)&planadj;
  dataadj[4] = (void*)fft_temp2;


  printf("FFT plan done \n\n");
  
  
  assert((start = clock())!=-1);
  purify_measurement_cftfwd((void*)y0, (void*)xinc, datafwd);
  stop = clock();
  t = (double) (stop-start)/CLOCKS_PER_SEC;
  printf("Time forward operator: %f \n\n", t);

  
  //Noise realization
  //Input snr
  snr = 30.0;
  a = cblas_dznrm2(Ny, (void*)y0, 1);
  sigma = a*pow(10.0,-(snr/20.0))/sqrt(Ny);

  FILE *fout = fopen("ein.uv", "w");
    
  for (i=0; i < Ny; i++) {
//      noise[i] = (sopt_ran_gasdev2(seedn) + sopt_ran_gasdev2(seedn)*I)*(sigma/sqrt(2));
      noise[i] = 0;
      y[i] = y0[i] + noise[i];

    fprintf(fout, "%14.5e%14.5e%14.5e%14.5e%14.5e%14.5e\n", 
            vis_test.u[i], vis_test.v[i], vis_test.w[i],
            creal(y[i]), cimag(y[i]), 1.0);
  }

  fclose(fout);

  //Rescaling the measurements

  aux4 = (double)Ny/(double)Nx;

  for (i=0; i < Ny; i++) {
    y[i] = y[i]/sqrt(aux4);
  }

  for (i=0; i < Nx; i++) {
    deconv[i] = deconv[i]/sqrt(aux4);
  }
  
  
  // Output image.
  img_copy.fov_x = 1.0 / 180.0 * PURIFY_PI;
  img_copy.fov_y = 1.0 / 180.0 * PURIFY_PI;
  img_copy.nx = param_m1.nx1;
  img_copy.ny = param_m1.ny1;

  for (i=0; i < Nx; i++){
    xoutc[i] = 0.0 + 0.0*I;
  }
  
  //Dirty image
  purify_measurement_cftadj((void*)xoutc, (void*)y, dataadj);

  for (i=0; i < Nx; i++) {
    xout[i] = creal(xoutc[i]);
  }

  aux1 = purify_utils_maxarray(xout, Nx);

  

   img_copy.pix = (double*)malloc((Nx) * sizeof(double));
  PURIFY_ERROR_MEM_ALLOC_CHECK(img_copy.pix);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xoutc[i]);
  }
  
  purify_image_writefile(&img_copy, "eindirty.fits", filetype_img);
  
  return 0;
  
  //SARA structure initialization

    param1.ndict = Nb;
    param1.real = 0;

    dict_types = malloc(param1.ndict * sizeof(sopt_wavelet_type));
    PURIFY_ERROR_MEM_ALLOC_CHECK(dict_types);


    dict_types[0] = SOPT_WAVELET_DB1;
    dict_types[1] = SOPT_WAVELET_DB2;
    dict_types[2] = SOPT_WAVELET_DB3;
    dict_types[3] = SOPT_WAVELET_DB4;
    dict_types[4] = SOPT_WAVELET_DB5;
    dict_types[5] = SOPT_WAVELET_DB6;
    dict_types[6] = SOPT_WAVELET_DB7;
    dict_types[7] = SOPT_WAVELET_DB8;
    dict_types[8] = SOPT_WAVELET_Dirac;
    

    sopt_sara_initop(&param1, param_m1.ny1, param_m1.nx1, 4, dict_types);

    datas[0] = (void*)&param1;

    //Db8 structure initialization

    param2.ndict = 1;
    param2.real = 0;

    dict_types1 = malloc(param2.ndict * sizeof(sopt_wavelet_type));
    PURIFY_ERROR_MEM_ALLOC_CHECK(dict_types1);

    dict_types1[0] = SOPT_WAVELET_DB8;
    
    sopt_sara_initop(&param2, param_m1.ny1, param_m1.nx1, 4, dict_types1);

  datas1[0] = (void*)&param2;

  //Dirac structure initialization

    param3.ndict = 1;
    param3.real = 0;

    dict_types2 = malloc(param3.ndict * sizeof(sopt_wavelet_type));
    PURIFY_ERROR_MEM_ALLOC_CHECK(dict_types2);

    dict_types2[0] = SOPT_WAVELET_Dirac;
    
    sopt_sara_initop(&param3, param_m1.ny1, param_m1.nx1, 4, dict_types2);

  datas2[0] = (void*)&param3;

  //Scaling constants in the diferent representation domains

  sopt_sara_analysisop((void*)dummyc, (void*)xoutc, datas);

  for (i=0; i < Nr; i++) {
    dummyr[i] = creal(dummyc[i]);
  }

  aux2 = purify_utils_maxarray(dummyr, Nr);

  sopt_sara_analysisop((void*)dummyc, (void*)xoutc, datas1);

  for (i=0; i < Nr; i++) {
    dummyr[i] = creal(dummyc[i]);
  }

  aux3 = purify_utils_maxarray(dummyr, Nx);

  


  // Output image.
  img_copy.fov_x = 1.0 / 180.0 * PURIFY_PI;
  img_copy.fov_y = 1.0 / 180.0 * PURIFY_PI;
  img_copy.nx = param_m1.nx1;
  img_copy.ny = param_m1.ny1;



  //Initial solution and weights
  for (i=0; i < Nx; i++) {
      xoutc[i] = 0.0 + 0.0*I;
      wdx[i] = 1.0;
      wdy[i] = 1.0;
  }
  for (i=0; i < Nr; i++){
    w[i] = 1.0;
  }
  //Copy true image in xout
  for (i=0; i < Nx; i++) {
    xout[i] = creal(xinc[i]);
  }



  printf("**********************\n");
  printf("BPSA reconstruction\n");
  printf("**********************\n");

  
    
  //Structure for the L1 solver      
  param4.verbose = 2;
  param4.max_iter = 300;
  param4.gamma = gamma*aux2;
  param4.rel_obj = 0.001;
  param4.epsilon = sqrt(Ny + 2*sqrt(Ny))*sigma/sqrt(aux4);
  param4.epsilon_tol = 0.01;
  param4.real_data = 0;
  param4.cg_max_iter = 100;
  param4.cg_tol = 0.000001;

  
  //Initial solution
  for (i=0; i < Nx; i++) {
      xoutc[i] = 0.0 + 0.0*I;
  }
 
  #ifdef _OPENMP 
    start1 = omp_get_wtime();
  #else
    assert((start = clock())!=-1);
  #endif
  sopt_l1_sdmm((void*)xoutc, Nx,
                   &purify_measurement_cftfwd,
                   datafwd,
                   &purify_measurement_cftadj,
                   dataadj,
                   &sopt_sara_synthesisop,
                   datas,
                   &sopt_sara_analysisop,
                   datas,
                   Nr,
                   (void*)y, Ny, w, param4);

  #ifdef _OPENMP 
    stop1 = omp_get_wtime();
    t = stop1 - start1;
  #else
    stop = clock();
    t = (double) (stop-start)/CLOCKS_PER_SEC;
  #endif

  printf("Time BPSA: %f \n\n", t); 

  
    //SNR
    for (i=0; i < Nx; i++) {
        error[i] = creal(xoutc[i])-xout[i];
    }
    mse = cblas_dnrm2(Nx, error, 1);
    a = cblas_dnrm2(Nx, xout, 1);
    snr_out = 20.0*log10(a/mse);
    printf("SNR: %f dB\n\n", snr_out);


    
  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xoutc[i]);
  }
  
  purify_image_writefile(&img_copy, "./data/test/einbpsa.fits", filetype_img);

  //Residual image

  purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd);
  alpha = -1.0 +0.0*I;
  cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1);
  purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xinc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/einbpsares.fits", filetype_img);
  
  //Error image
  for (i=0; i < Nx; i++){
    img_copy.pix[i] = error[i];
  }
  
  purify_image_writefile(&img_copy, "data/test/einbpsaerror.fits", filetype_img);
  


  printf("**********************\n");
  printf("SARA reconstruction\n");
  printf("**********************\n");

  

  //Structure for the RWL1 solver    
  param5.verbose = 2;
  param5.max_iter = 5;
  param5.rel_var = 0.001;
  param5.sigma = sigma*sqrt((double)Ny/(double)Nr);
  param5.init_sol = 1;

  
  #ifdef _OPENMP 
    start1 = omp_get_wtime();
  #else
    assert((start = clock())!=-1);
  #endif
  sopt_l1_rwsdmm((void*)xoutc, Nx,
                   &purify_measurement_cftfwd,
                   datafwd,
                   &purify_measurement_cftadj,
                   dataadj,
                   &sopt_sara_synthesisop,
                   datas,
                   &sopt_sara_analysisop,
                   datas,
                   Nr,
                   (void*)y, Ny, param4, param5);

  #ifdef _OPENMP 
    stop1 = omp_get_wtime();
    t = stop1 - start1;
  #else
    stop = clock();
    t = (double) (stop-start)/CLOCKS_PER_SEC;
  #endif

  printf("Time SARA: %f \n\n", t); 

  
    //SNR
    for (i=0; i < Nx; i++) {
        error[i] = creal(xoutc[i])-xout[i];
    }
    mse = cblas_dnrm2(Nx, error, 1);
    a = cblas_dnrm2(Nx, xout, 1);
    snr_out = 20.0*log10(a/mse);
    printf("SNR: %f dB\n\n", snr_out);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xoutc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/einsara.fits", filetype_img);

  

   //Residual image

  purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd);
  alpha = -1.0 +0.0*I;
  cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1);
  purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xinc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/einsarares.fits", filetype_img);

  //Error image
  for (i=0; i < Nx; i++){
    img_copy.pix[i] = error[i];
  }
  
  purify_image_writefile(&img_copy, "data/test/einsaraerror.fits", filetype_img);

  
    printf("**********************\n");
    printf("TV reconstruction\n");
    printf("**********************\n");

    
    //Structure for the TV prox
    param6.verbose = 1;
    param6.max_iter = 50;
    param6.rel_obj = 0.0001;

    
    //Structure for the TV solver    
    param7.verbose = 2;
    param7.max_iter = 300;
    param7.gamma = gamma*aux1;
    param7.rel_obj = 0.001;
    param7.epsilon = sqrt(Ny + 2*sqrt(Ny))*sigma/sqrt(aux4);
    param7.epsilon_tol = 0.01;
    param7.real_data = 0;
    param7.cg_max_iter = 100;
    param7.cg_tol = 0.000001;
    param7.paramtv = param6;

    //Initial solution and weights
    for (i=0; i < Nx; i++) {
        xoutc[i] = 0.0 + 0.0*I;
        wdx[i] = 1.0;
        wdy[i] = 1.0;
    }

    
    assert((start = clock())!=-1);
    sopt_tv_sdmm((void*)xoutc, dimx, dimy,
                   &purify_measurement_cftfwd,
                   datafwd,
                   &purify_measurement_cftadj,
                   dataadj,
                   (void*)y, Ny, wdx, wdy, param7);
    stop = clock();
    t = (double) (stop-start)/CLOCKS_PER_SEC;
    
    printf("Time TV: %f \n\n", t); 

    
   //SNR
    for (i=0; i < Nx; i++) {
        error[i] = creal(xoutc[i])-xout[i];
    }
    mse = cblas_dnrm2(Nx, error, 1);
    a = cblas_dnrm2(Nx, xout, 1);
    snr_out = 20.0*log10(a/mse);
    printf("SNR: %f dB\n\n", snr_out);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xoutc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/eintv.fits", filetype_img);

   //Residual image

  purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd);
  alpha = -1.0 +0.0*I;
  cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1);
  purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xinc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/eintvres.fits", filetype_img);

  //Error image
  for (i=0; i < Nx; i++){
    img_copy.pix[i] = error[i];
  }
  
  purify_image_writefile(&img_copy, "data/test/eintverror.fits", filetype_img);

  printf("**********************\n");
  printf("RWTV reconstruction\n");
  printf("**********************\n");

  

  //Structure for the RWTV solver    
  param8.verbose = 2;
  param8.max_iter = 5;
  param8.rel_var = 0.001;
  param8.sigma = sigma*sqrt(Ny/(2*Nx));
  param8.init_sol = 1;

  
  assert((start = clock())!=-1);
    sopt_tv_rwsdmm((void*)xoutc, dimx, dimy,
                   &purify_measurement_cftfwd,
                   datafwd,
                   &purify_measurement_cftadj,
                   dataadj,
                   (void*)y, Ny, param7, param8);
    stop = clock();
    t = (double) (stop-start)/CLOCKS_PER_SEC;

  printf("Time RWTV: %f \n\n", t); 

  

    //SNR
    for (i=0; i < Nx; i++) {
        error[i] = creal(xoutc[i])-xout[i];
    }
    mse = cblas_dnrm2(Nx, error, 1);
    a = cblas_dnrm2(Nx, xout, 1);
    snr_out = 20.0*log10(a/mse);
    printf("SNR: %f dB\n\n", snr_out);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xoutc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/einrwtv.fits", filetype_img);

   //Residual image

  purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd);
  alpha = -1.0 +0.0*I;
  cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1);
  purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xinc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/einrwtvres.fits", filetype_img);

  //Error image
  for (i=0; i < Nx; i++){
    img_copy.pix[i] = error[i];
  }
  
  purify_image_writefile(&img_copy, "data/test/einrwtverror.fits", filetype_img);

  printf("**********************\n");
  printf("Db8 reconstruction\n");
  printf("**********************\n");

  //Initial solution
  for (i=0; i < Nx; i++) {
        xoutc[i] = 0.0 + 0.0*I;
  }
  
  param4.gamma = gamma*aux3;

  assert((start = clock())!=-1);
   sopt_l1_sdmm((void*)xoutc, Nx,
                   &purify_measurement_cftfwd,
                   datafwd,
                   &purify_measurement_cftadj,
                   dataadj,
                   &sopt_sara_synthesisop,
                   datas1,
                   &sopt_sara_analysisop,
                   datas1,
                   Nx,
                   (void*)y, Ny, w, param4);

  stop = clock();
  t = (double) (stop-start)/CLOCKS_PER_SEC;

  printf("Time BPDb8: %f \n\n", t); 

  //SNR
  for (i=0; i < Nx; i++) {
        error[i] = creal(xoutc[i])-xout[i];
    }
    mse = cblas_dnrm2(Nx, error, 1);
    a = cblas_dnrm2(Nx, xout, 1);
    snr_out = 20.0*log10(a/mse);
    printf("SNR: %f dB\n\n", snr_out);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xoutc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/eindb8.fits", filetype_img);

   //Residual image

  purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd);
  alpha = -1.0 +0.0*I;
  cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1);
  purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xinc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/eindb8res.fits", filetype_img);

  //Error image
  for (i=0; i < Nx; i++){
    img_copy.pix[i] = error[i];
  }
  
  purify_image_writefile(&img_copy, "data/test/eindb8error.fits", filetype_img);

  printf("**********************\n");
  printf("RWBPDb8 reconstruction\n");
  printf("**********************\n");

  

  //Structure for the RWL1 solver    
  param5.verbose = 2;
  param5.max_iter = 5;
  param5.rel_var = 0.001;
  param5.sigma = sigma*sqrt((double)Ny/(double)Nx);
  param5.init_sol = 1;

  
  assert((start = clock())!=-1);
  sopt_l1_rwsdmm((void*)xoutc, Nx,
                   &purify_measurement_cftfwd,
                   datafwd,
                   &purify_measurement_cftadj,
                   dataadj,
                   &sopt_sara_synthesisop,
                   datas1,
                   &sopt_sara_analysisop,
                   datas1,
                   Nx,
                   (void*)y, Ny, param4, param5);

  stop = clock();
  t = (double) (stop-start)/CLOCKS_PER_SEC;

  printf("Time RWBPDb8: %f \n\n", t); 

  

    //SNR
    for (i=0; i < Nx; i++) {
        error[i] = creal(xoutc[i])-xout[i];
    }
    mse = cblas_dnrm2(Nx, error, 1);
    a = cblas_dnrm2(Nx, xout, 1);
    snr_out = 20.0*log10(a/mse);
    printf("SNR: %f dB\n\n", snr_out);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xoutc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/einrwdb8.fits", filetype_img);

   //Residual image

  purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd);
  alpha = -1.0 +0.0*I;
  cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1);
  purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xinc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/einrwdb8res.fits", filetype_img);

  //Error image
  for (i=0; i < Nx; i++){
    img_copy.pix[i] = error[i];
  }
  
  purify_image_writefile(&img_copy, "data/test/einrwdb8error.fits", filetype_img);



  printf("**********************\n");
  printf("BP reconstruction\n");
  printf("**********************\n");

  param4.gamma = gamma*aux1;

  //Initial solution
  for (i=0; i < Nx; i++) {
      xoutc[i] = 0.0 + 0.0*I;
  }

  assert((start = clock())!=-1);
  sopt_l1_sdmm((void*)xoutc, Nx,
                   &purify_measurement_cftfwd,
                   datafwd,
                   &purify_measurement_cftadj,
                   dataadj,
                   &sopt_sara_synthesisop,
                   datas2,
                   &sopt_sara_analysisop,
                   datas2,
                   Nx,
                   (void*)y, Ny, w, param4); 

  stop = clock();
  t = (double) (stop-start)/CLOCKS_PER_SEC;

  printf("Time BP: %f \n\n", t); 

  //SNR
  for (i=0; i < Nx; i++) {
        error[i] = creal(xoutc[i])-xout[i];
    }
    mse = cblas_dnrm2(Nx, error, 1);
    a = cblas_dnrm2(Nx, xout, 1);
    snr_out = 20.0*log10(a/mse);
    printf("SNR: %f dB\n\n", snr_out);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xoutc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/einbp.fits", filetype_img);

   //Residual image

  purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd);
  alpha = -1.0 +0.0*I;
  cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1);
  purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xinc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/einbpres.fits", filetype_img);

  //Error image
  for (i=0; i < Nx; i++){
    img_copy.pix[i] = error[i];
  }
  
  purify_image_writefile(&img_copy, "data/test/einbperror.fits", filetype_img);

  printf("**********************\n");
  printf("RWBP reconstruction\n");
  printf("**********************\n");

  

  //Structure for the RWL1 solver    
  param5.verbose = 2;
  param5.max_iter = 5;
  param5.rel_var = 0.001;
  param5.sigma = sigma*sqrt((double)Ny/(double)Nx);
  param5.init_sol = 1;

  
  assert((start = clock())!=-1);
  sopt_l1_rwsdmm((void*)xoutc, Nx,
                   &purify_measurement_cftfwd,
                   datafwd,
                   &purify_measurement_cftadj,
                   dataadj,
                   &sopt_sara_synthesisop,
                   datas2,
                   &sopt_sara_analysisop,
                   datas2,
                   Nx,
                   (void*)y, Ny, param4, param5);

  stop = clock();
  t = (double) (stop-start)/CLOCKS_PER_SEC;

  printf("Time RWBP: %f \n\n", t); 

  

    //SNR
    for (i=0; i < Nx; i++) {
        error[i] = creal(xoutc[i])-xout[i];
    }
    mse = cblas_dnrm2(Nx, error, 1);
    a = cblas_dnrm2(Nx, xout, 1);
    snr_out = 20.0*log10(a/mse);
    printf("SNR: %f dB\n\n", snr_out);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xoutc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/einrwbp.fits", filetype_img);

   //Residual image

  purify_measurement_cftfwd((void*)y0, (void*)xoutc, datafwd);
  alpha = -1.0 +0.0*I;
  cblas_zaxpy(Ny, (void*)&alpha, y, 1, y0, 1);
  purify_measurement_cftadj((void*)xinc, (void*)y0, dataadj);

  for (i=0; i < Nx; i++){
    img_copy.pix[i] = creal(xinc[i]);
  }
  
  purify_image_writefile(&img_copy, "data/test/einrwbpres.fits", filetype_img);

  //Error image
  for (i=0; i < Nx; i++){
    img_copy.pix[i] = error[i];
  }
  
  purify_image_writefile(&img_copy, "data/test/einrwbperror.fits", filetype_img);


  
  
  //Free all memory
  purify_image_free(&img);
  purify_image_free(&img_copy);
  free(deconv);
  purify_visibility_free(&vis_test);
  free(y);
  free(xinc);
  free(xout);
  free(w);
  free(noise);
  free(y0);
  free(error);
  free(xoutc);
  free(wdx);
  free(wdy);

  sopt_sara_free(&param1);
  sopt_sara_free(&param2);
  sopt_sara_free(&param3);
  free(dict_types);
  free(dict_types1);
  free(dict_types2);

  free(fft_temp1);
  free(fft_temp2);
  fftw_destroy_plan(planfwd);
  fftw_destroy_plan(planadj);
  purify_sparsemat_freer(&gmat);

  free(dummyr);
  free(dummyc);


  return 0;

}