Example #1
0
// ------------------------------------------------------------
// Solve dA * dX = dB, where dA and dX are stored in GPU device memory.
// Internally, MAGMA uses a hybrid CPU + GPU algorithm.
void gpu_interface( magma_int_t n, magma_int_t nrhs )
{
    magmaDoubleComplex *dA=NULL, *dX=NULL;
    magma_int_t *ipiv=NULL;
    magma_int_t ldda = magma_roundup( n, 32 );  // round up to multiple of 32 for best GPU performance
    magma_int_t lddx = ldda;
    magma_int_t info = 0;
    magma_queue_t queue=NULL;
    
    // magma_*malloc routines for GPU memory are type-safe,
    // but you can use cudaMalloc if you prefer.
    magma_zmalloc( &dA, ldda*n );
    magma_zmalloc( &dX, lddx*nrhs );
    magma_imalloc_cpu( &ipiv, n );  // ipiv always on CPU
    if ( dA == NULL || dX == NULL || ipiv == NULL ) {
        fprintf( stderr, "malloc failed\n" );
        goto cleanup;
    }
    
    magma_int_t dev = 0;
    magma_queue_create( dev, &queue );
    
    // Replace these with your code to initialize A and X
    zfill_matrix_gpu( n, n, dA, ldda, queue );
    zfill_rhs_gpu( n, nrhs, dX, lddx, queue );
    
    magma_zgesv_gpu( n, 1, dA, ldda, ipiv, dX, ldda, &info );
    if ( info != 0 ) {
        fprintf( stderr, "magma_zgesv_gpu failed with info=%d\n", info );
    }
    
    // TODO: use result in dX
    
cleanup:
    magma_queue_destroy( queue );
    magma_free( dA );
    magma_free( dX );
    magma_free_cpu( ipiv );
}
Example #2
0
// ------------------------------------------------------------
// Replace with your code to initialize the dX rhs on the GPU device.
void zfill_rhs_gpu(
    magma_int_t m, magma_int_t nrhs, magmaDoubleComplex *dX, magma_int_t lddx )
{
    zfill_matrix_gpu( m, nrhs, dX, lddx );
}