// ------------------------------------------------------------ // Solve dA * dX = dB, where dA and dX are stored in GPU device memory. // Internally, MAGMA uses a hybrid CPU + GPU algorithm. void gpu_interface( magma_int_t n, magma_int_t nrhs ) { magmaDoubleComplex *dA=NULL, *dX=NULL; magma_int_t *ipiv=NULL; magma_int_t ldda = magma_roundup( n, 32 ); // round up to multiple of 32 for best GPU performance magma_int_t lddx = ldda; magma_int_t info = 0; magma_queue_t queue=NULL; // magma_*malloc routines for GPU memory are type-safe, // but you can use cudaMalloc if you prefer. magma_zmalloc( &dA, ldda*n ); magma_zmalloc( &dX, lddx*nrhs ); magma_imalloc_cpu( &ipiv, n ); // ipiv always on CPU if ( dA == NULL || dX == NULL || ipiv == NULL ) { fprintf( stderr, "malloc failed\n" ); goto cleanup; } magma_int_t dev = 0; magma_queue_create( dev, &queue ); // Replace these with your code to initialize A and X zfill_matrix_gpu( n, n, dA, ldda, queue ); zfill_rhs_gpu( n, nrhs, dX, lddx, queue ); magma_zgesv_gpu( n, 1, dA, ldda, ipiv, dX, ldda, &info ); if ( info != 0 ) { fprintf( stderr, "magma_zgesv_gpu failed with info=%d\n", info ); } // TODO: use result in dX cleanup: magma_queue_destroy( queue ); magma_free( dA ); magma_free( dX ); magma_free_cpu( ipiv ); }
// ------------------------------------------------------------ // Replace with your code to initialize the dX rhs on the GPU device. void zfill_rhs_gpu( magma_int_t m, magma_int_t nrhs, magmaDoubleComplex *dX, magma_int_t lddx ) { zfill_matrix_gpu( m, nrhs, dX, lddx ); }