Exemple #1
0
// ------------------------------------------------------------
// Solve A * X = B, where A and X are stored in CPU host memory.
// Internally, MAGMA transfers data to the GPU device
// and uses a hybrid CPU + GPU algorithm.
void cpu_interface( magma_int_t n, magma_int_t nrhs )
{
    magmaDoubleComplex *A=NULL, *X=NULL;
    magma_int_t *ipiv=NULL;
    magma_int_t lda  = n;
    magma_int_t ldx  = lda;
    magma_int_t info = 0;
    
    // magma_*malloc_cpu routines for CPU memory are type-safe and align to memory boundaries,
    // but you can use malloc or new if you prefer.
    magma_zmalloc_cpu( &A, lda*n );
    magma_zmalloc_cpu( &X, ldx*nrhs );
    magma_imalloc_cpu( &ipiv, n );
    if ( A == NULL || X == NULL || ipiv == NULL ) {
        fprintf( stderr, "malloc failed\n" );
        goto cleanup;
    }
    
    // Replace these with your code to initialize A and X
    zfill_matrix( n, n, A, lda );
    zfill_rhs( n, nrhs, X, ldx );
    
    magma_zgesv( n, 1, A, lda, ipiv, X, lda, &info );
    if ( info != 0 ) {
        fprintf( stderr, "magma_zgesv failed with info=%d\n", info );
    }
    
    // TODO: use result in X
    
cleanup:
    magma_free_cpu( A );
    magma_free_cpu( X );
    magma_free_cpu( ipiv );
}
Exemple #2
0
void MAGMAF_ZGESV ( magma_int_t *n, magma_int_t *nrhs,
                    cuDoubleComplex *A, magma_int_t *lda, magma_int_t *ipiv,
                    cuDoubleComplex *B, magma_int_t *ldb, magma_int_t *info)
{
    magma_zgesv(  *n, *nrhs,
                  A, *lda, ipiv,
                  B, *ldb,
                  info);
}
Exemple #3
0
     BOOST_FORCEINLINE result_type operator()(A0& a0, A1& a1, A2& a2) const
     {
        result_type that;
        nt2_la_int  n  = std::min(nt2::height(a0),nt2::width(a0));
        nt2_la_int  lda = n;
        nt2_la_int  nhrs = nt2::width(a2);
        nt2_la_int  ldb = a2.leading_size();

        a1.resize(nt2::of_size(n,1));

        magma_zgesv(n,nhrs,(cuDoubleComplex*)a0.data(),lda,a1.data()
                   ,(cuDoubleComplex*)a2.data(),ldb,&that);

        return that;
     }
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing zgesv
*/
int main(int argc, char **argv)
{
    real_Double_t   gflops, cpu_perf, cpu_time, gpu_perf, gpu_time;
    double          error, Rnorm, Anorm, Xnorm, *work;
    magmaDoubleComplex c_one     = MAGMA_Z_ONE;
    magmaDoubleComplex c_neg_one = MAGMA_Z_NEG_ONE;
    magmaDoubleComplex *h_A, *h_LU, *h_B, *h_X;
    magma_int_t *ipiv;
    magma_int_t N, nrhs, lda, ldb, info, sizeA, sizeB;
    magma_int_t ione     = 1;
    magma_int_t ISEED[4] = {0,0,0,1};
    magma_int_t status = 0;

    /* Initialize */
    magma_queue_t  queue[2];
    magma_device_t device[ MagmaMaxGPUs ];
    int num = 0;
    magma_err_t err;
    magma_init();
    
    magma_opts opts;
    parse_opts( argc, argv, &opts );
    
    double tol = opts.tolerance * lapackf77_dlamch("E");
    
    nrhs = opts.nrhs;
    
    err = magma_get_devices( device, MagmaMaxGPUs, &num );
    if ( err != 0 || num < 1 ) {
      fprintf( stderr, "magma_get_devices failed: %d\n", err );
      exit(-1);
    }

    // Create two queues on device opts.device
    err = magma_queue_create( device[ opts.device ], &queue[0] );
    if ( err != 0 ) {
      fprintf( stderr, "magma_queue_create failed: %d\n", err );
      exit(-1);
    }
    err = magma_queue_create( device[ opts.device ], &queue[1] );
    if ( err != 0 ) {
      fprintf( stderr, "magma_queue_create failed: %d\n", err );
      exit(-1);
    }

    printf("ngpu %d\n", (int) opts.ngpu );
    printf("    N  NRHS   CPU Gflop/s (sec)   GPU GFlop/s (sec)   ||B - AX|| / N*||A||*||X||\n");
    printf("================================================================================\n");
    for( int i = 0; i < opts.ntest; ++i ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            N = opts.nsize[i];
            lda    = N;
            ldb    = lda;
            gflops = ( FLOPS_ZGETRF( N, N ) + FLOPS_ZGETRS( N, nrhs ) ) / 1e9;
            
            TESTING_MALLOC_CPU( h_A,  magmaDoubleComplex, lda*N    );
            TESTING_MALLOC_CPU( h_LU, magmaDoubleComplex, lda*N    );
            TESTING_MALLOC_CPU( h_B,  magmaDoubleComplex, ldb*nrhs );
            TESTING_MALLOC_CPU( h_X,  magmaDoubleComplex, ldb*nrhs );
            TESTING_MALLOC_CPU( work, double,          N        );
            TESTING_MALLOC_CPU( ipiv, magma_int_t,     N        );
            
            /* Initialize the matrices */
            sizeA = lda*N;
            sizeB = ldb*nrhs;
            lapackf77_zlarnv( &ione, ISEED, &sizeA, h_A );
            lapackf77_zlarnv( &ione, ISEED, &sizeB, h_B );
            
            // copy A to LU and B to X; save A and B for residual
            lapackf77_zlacpy( "F", &N, &N,    h_A, &lda, h_LU, &lda );
            lapackf77_zlacpy( "F", &N, &nrhs, h_B, &ldb, h_X,  &ldb );
            
            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            gpu_time = magma_wtime();
            magma_zgesv( N, nrhs, h_LU, lda, ipiv, h_X, ldb, &info, queue );
            gpu_time = magma_wtime() - gpu_time;
            gpu_perf = gflops / gpu_time;
            if (info != 0)
                printf("magma_zgesv returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            
            //=====================================================================
            // Residual
            //=====================================================================
            Anorm = lapackf77_zlange("I", &N, &N,    h_A, &lda, work);
            Xnorm = lapackf77_zlange("I", &N, &nrhs, h_X, &ldb, work);
            
            blasf77_zgemm( MagmaNoTransStr, MagmaNoTransStr, &N, &nrhs, &N,
                           &c_one,     h_A, &lda,
                                       h_X, &ldb,
                           &c_neg_one, h_B, &ldb);
            
            Rnorm = lapackf77_zlange("I", &N, &nrhs, h_B, &ldb, work);
            error = Rnorm/(N*Anorm*Xnorm);
            status |= ! (error < tol);
            
            /* ====================================================================
               Performs operation using LAPACK
               =================================================================== */
            if ( opts.lapack ) {
                cpu_time = magma_wtime();
                lapackf77_zgesv( &N, &nrhs, h_A, &lda, ipiv, h_B, &ldb, &info );
                cpu_time = magma_wtime() - cpu_time;
                cpu_perf = gflops / cpu_time;
                if (info != 0)
                    printf("lapackf77_zgesv returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
                
                printf( "%5d %5d   %7.2f (%7.2f)   %7.2f (%7.2f)   %8.2e%s\n",
                        (int) N, (int) nrhs, cpu_perf, cpu_time, gpu_perf, gpu_time,
                        error, (error < tol ? "" : "  failed"));
            }
            else {
                printf( "%5d %5d     ---   (  ---  )   %7.2f (%7.2f)   %8.2e%s\n",
                        (int) N, (int) nrhs, gpu_perf, gpu_time,
                        error, (error < tol ? "" : "  failed"));
            }
            
            TESTING_FREE_CPU( h_A  );
            TESTING_FREE_CPU( h_LU );
            TESTING_FREE_CPU( h_B  );
            TESTING_FREE_CPU( h_X  );
            TESTING_FREE_CPU( work );
            TESTING_FREE_CPU( ipiv );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }

    magma_queue_destroy( queue[0] );
    magma_queue_destroy( queue[1] );
    magma_finalize();

    return status;
}