void magmaf_dgetrf( magma_int_t *m, magma_int_t *n, double *A, magma_int_t *lda, magma_int_t *ipiv, magma_int_t *info ) { magma_dgetrf( *m, *n, A, *lda, ipiv, info ); }
SEXP magLU(SEXP a) { SEXP gpu = GET_SLOT(a, install("gpu")), b = PROTECT(NEW_OBJECT(MAKE_CLASS("magmaLU"))); int *DIMA = INTEGER(GET_DIM(a)), M = DIMA[0], N = DIMA[1], LDA = M, MIN_MN = M < N ? M : N, *ipiv, info; double *A = REAL(PROTECT(AS_NUMERIC(a))); b = SET_SLOT(b, install(".Data"), AS_NUMERIC(a)); SET_SLOT(b, install("pivot"), NEW_INTEGER(MIN_MN)); ipiv = INTEGER(GET_SLOT(b, install("pivot"))); SET_SLOT(b, install("gpu"), duplicate(gpu)); if(LOGICAL_VALUE(gpu)) { double *dA; magma_malloc((void**)&dA, (M*N)*sizeof(double)); magma_dsetmatrix(M, N, A, LDA, dA, LDA); magma_dgetrf_gpu(M, N, dA, LDA, ipiv, &info); magma_dgetmatrix(M, N, dA, LDA, REAL(b), LDA); magma_free(dA); } else { double *hA; magma_malloc_pinned((void**)&hA, (M*N)*sizeof(double)); lapackf77_dlacpy(MagmaUpperLowerStr, &M, &N, A, &LDA, hA, &LDA); magma_dgetrf(M, N, hA, LDA, ipiv, &info); lapackf77_dlacpy(MagmaUpperLowerStr, &M, &N, hA, &LDA, REAL(b), &LDA); magma_free_pinned(hA); } if(info < 0) error("illegal argument %d in 'magLU'", -1 * info); else if(info > 0) error("factor U is singular"); UNPROTECT(2); return b; }
SEXP magma_dgeMatrix_LU_(SEXP x, Rboolean warn_sing) { #ifdef HIPLAR_WITH_MAGMA SEXP val = get_factors(x, "LU"); int *dims, npiv, info; if (val != R_NilValue) { // R_ShowMessage("already in slot"); /* nothing to do if it's there in 'factors' slot */ return val; } dims = INTEGER(GET_SLOT(x, Matrix_DimSym)); if (dims[0] < 1 || dims[1] < 1) error(_("Cannot factor a matrix with zero extents")); npiv = (dims[0] < dims[1]) ? dims[0] : dims[1]; val = PROTECT(NEW_OBJECT(MAKE_CLASS("denseLU"))); slot_dup(val, x, Matrix_xSym); slot_dup(val, x, Matrix_DimSym); double *h_R = REAL(GET_SLOT(val, Matrix_xSym)); int *ipiv = INTEGER(ALLOC_SLOT(val, Matrix_permSym, INTSXP, npiv)); if(GPUFlag == 0){ #ifdef HIPLAR_DBG R_ShowMessage("DBG: LU decomposition using dgetrf;"); #endif F77_CALL(dgetrf)(dims, dims + 1, h_R, dims, ipiv, &info); } else if(GPUFlag == 1 && Interface == 0){ #ifdef HIPLAR_DBG R_ShowMessage("DBG: LU decomposition using magma_dgetrf;"); #endif magma_dgetrf(dims[0], dims[1], h_R, dims[0], ipiv, &info); } else if(GPUFlag == 1 && Interface == 1) { #ifdef HIPLAR_DBG R_ShowMessage("DBG: LU decomposition using magma_dgetrf_gpu;"); #endif double *d_A; int N2 = dims[0] * dims[1]; cublasStatus retStatus; cublasAlloc( N2 , sizeof(double), (void**)&d_A); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Memory Allocation")); /********************************************/ cublasSetVector(N2, sizeof(double), h_R, 1, d_A, 1); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Date Transfer to Device")); /********************************************/ magma_dgetrf_gpu(dims[0],dims[1], d_A, dims[0], ipiv, &info); cublasGetVector( N2, sizeof(double), d_A, 1, h_R, 1); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Date Transfer from Device")); /********************************************/ cublasFree(d_A); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error freeing data")); /********************************************/ } else error(_("MAGMA/LAPACK/Interface Flag not defined correctly")); if (info < 0) error(_("Lapack routine %s returned error code %d"), "dgetrf", info); else if (info > 0 && warn_sing) warning(_("Exact singularity detected during LU decomposition: %s, i=%d."), "U[i,i]=0", info); UNPROTECT(1); return set_factors(x, val, "LU"); #endif return R_NilValue; }
/* //////////////////////////////////////////////////////////////////////////// -- Testing dgetrf */ int main( int argc, char** argv) { TESTING_INIT(); real_Double_t gflops, gpu_perf, gpu_time, cpu_perf=0, cpu_time=0; double error; double *h_A; magma_int_t *ipiv; magma_int_t M, N, n2, lda, ldda, info, min_mn; magma_int_t status = 0; magma_opts opts; parse_opts( argc, argv, &opts ); double tol = opts.tolerance * lapackf77_dlamch("E"); printf("ngpu %d\n", (int) opts.ngpu ); if ( opts.check == 2 ) { printf(" M N CPU GFlop/s (sec) GPU GFlop/s (sec) |Ax-b|/(N*|A|*|x|)\n"); } else { printf(" M N CPU GFlop/s (sec) GPU GFlop/s (sec) |PA-LU|/(N*|A|)\n"); } printf("=========================================================================\n"); for( int itest = 0; itest < opts.ntest; ++itest ) { for( int iter = 0; iter < opts.niter; ++iter ) { M = opts.msize[itest]; N = opts.nsize[itest]; min_mn = min(M, N); lda = M; n2 = lda*N; ldda = ((M+31)/32)*32; gflops = FLOPS_DGETRF( M, N ) / 1e9; TESTING_MALLOC_CPU( ipiv, magma_int_t, min_mn ); TESTING_MALLOC_PIN( h_A, double, n2 ); /* ===================================================================== Performs operation using LAPACK =================================================================== */ if ( opts.lapack ) { init_matrix( M, N, h_A, lda ); cpu_time = magma_wtime(); lapackf77_dgetrf(&M, &N, h_A, &lda, ipiv, &info); cpu_time = magma_wtime() - cpu_time; cpu_perf = gflops / cpu_time; if (info != 0) printf("lapackf77_dgetrf returned error %d: %s.\n", (int) info, magma_strerror( info )); } /* ==================================================================== Performs operation using MAGMA =================================================================== */ init_matrix( M, N, h_A, lda ); gpu_time = magma_wtime(); magma_dgetrf( M, N, h_A, lda, ipiv, &info); gpu_time = magma_wtime() - gpu_time; gpu_perf = gflops / gpu_time; if (info != 0) printf("magma_dgetrf returned error %d: %s.\n", (int) info, magma_strerror( info )); /* ===================================================================== Check the factorization =================================================================== */ if ( opts.lapack ) { printf("%5d %5d %7.2f (%7.2f) %7.2f (%7.2f)", (int) M, (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time ); } else { printf("%5d %5d --- ( --- ) %7.2f (%7.2f)", (int) M, (int) N, gpu_perf, gpu_time ); } if ( opts.check == 2 ) { error = get_residual( M, N, h_A, lda, ipiv ); printf(" %8.2e %s\n", error, (error < tol ? "ok" : "failed")); status += ! (error < tol); } else if ( opts.check ) { error = get_LU_error( M, N, h_A, lda, ipiv ); printf(" %8.2e %s\n", error, (error < tol ? "ok" : "failed")); status += ! (error < tol); } else { printf(" --- \n"); } TESTING_FREE_CPU( ipiv ); TESTING_FREE_PIN( h_A ); fflush( stdout ); } if ( opts.niter > 1 ) { printf( "\n" ); } } TESTING_FINALIZE(); return status; }
extern "C" magma_int_t magma_dgesv( magma_int_t n, magma_int_t nrhs, double *A, magma_int_t lda, magma_int_t *ipiv, double *B, magma_int_t ldb, magma_int_t *info) { /* -- MAGMA (version 1.3.0) -- Univ. of Tennessee, Knoxville Univ. of California, Berkeley Univ. of Colorado, Denver November 2012 Purpose ======= Solves a system of linear equations A * X = B where A is a general N-by-N matrix and X and B are N-by-NRHS matrices. The LU decomposition with partial pivoting and row interchanges is used to factor A as A = P * L * U, where P is a permutation matrix, L is unit lower triangular, and U is upper triangular. The factored form of A is then used to solve the system of equations A * X = B. Arguments ========= N (input) INTEGER The order of the matrix A. N >= 0. NRHS (input) INTEGER The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. A (input/output) DOUBLE_PRECISION array, dimension (LDA,N). On entry, the M-by-N matrix to be factored. On exit, the factors L and U from the factorization A = P*L*U; the unit diagonal elements of L are not stored. LDA (input) INTEGER The leading dimension of the array A. LDA >= max(1,N). IPIV (output) INTEGER array, dimension (min(M,N)) The pivot indices; for 1 <= i <= min(M,N), row i of the matrix was interchanged with row IPIV(i). B (input/output) DOUBLE_PRECISION array, dimension (LDB,NRHS) On entry, the right hand side matrix B. On exit, the solution matrix X. LDB (input) INTEGER The leading dimension of the array B. LDB >= max(1,N). INFO (output) INTEGER = 0: successful exit < 0: if INFO = -i, the i-th argument had an illegal value ===================================================================== */ magma_int_t num_gpus, ldda, lddb; *info = 0; if (n < 0) { *info = -1; } else if (nrhs < 0) { *info = -2; } else if (lda < max(1,n)) { *info = -4; } else if (ldb < max(1,n)) { *info = -7; } if (*info != 0) { magma_xerbla( __func__, -(*info) ); return *info; } /* Quick return if possible */ if (n == 0 || nrhs == 0) { return *info; } /* If single-GPU and allocation suceeds, use GPU interface. */ num_gpus = magma_num_gpus(); double *dA, *dB; if ( num_gpus > 1 ) { goto CPU_INTERFACE; } ldda = ((n+31)/32)*32; lddb = ldda; if ( MAGMA_SUCCESS != magma_dmalloc( &dA, ldda*n )) { goto CPU_INTERFACE; } if ( MAGMA_SUCCESS != magma_dmalloc( &dB, lddb*nrhs )) { magma_free( dA ); dA = NULL; goto CPU_INTERFACE; } assert( num_gpus == 1 && dA != NULL && dB != NULL ); magma_dsetmatrix( n, n, A, lda, dA, ldda ); magma_dgetrf_gpu( n, n, dA, ldda, ipiv, info ); magma_dgetmatrix( n, n, dA, ldda, A, lda ); if ( *info == 0 ) { magma_dsetmatrix( n, nrhs, B, ldb, dB, lddb ); magma_dgetrs_gpu( MagmaNoTrans, n, nrhs, dA, ldda, ipiv, dB, lddb, info ); magma_dgetmatrix( n, nrhs, dB, lddb, B, ldb ); } magma_free( dA ); magma_free( dB ); return *info; CPU_INTERFACE: /* If multi-GPU or allocation failed, use CPU interface and LAPACK. * Faster to use LAPACK for getrs than to copy A to GPU. */ magma_dgetrf( n, n, A, lda, ipiv, info ); if ( *info == 0 ) { lapackf77_dgetrs( MagmaNoTransStr, &n, &nrhs, A, &lda, ipiv, B, &ldb, info ); } return *info; }