예제 #1
0
void magmaf_dgetrf(
    magma_int_t *m, magma_int_t *n,
    double *A, magma_int_t *lda,
    magma_int_t *ipiv,
    magma_int_t *info )
{
    magma_dgetrf(
        *m, *n,
        A, *lda,
        ipiv,
        info );
}
예제 #2
0
파일: magFactors.c 프로젝트: sgsokol/magma
SEXP magLU(SEXP a)
{
   SEXP gpu = GET_SLOT(a, install("gpu")),
        b = PROTECT(NEW_OBJECT(MAKE_CLASS("magmaLU")));
   int *DIMA = INTEGER(GET_DIM(a)), M = DIMA[0], N = DIMA[1], LDA = M,
       MIN_MN = M < N ? M : N, *ipiv, info;
   double *A = REAL(PROTECT(AS_NUMERIC(a)));

   b = SET_SLOT(b, install(".Data"), AS_NUMERIC(a));
   SET_SLOT(b, install("pivot"), NEW_INTEGER(MIN_MN));
   ipiv = INTEGER(GET_SLOT(b, install("pivot")));
   SET_SLOT(b, install("gpu"), duplicate(gpu));

   if(LOGICAL_VALUE(gpu)) {
      double *dA;

      magma_malloc((void**)&dA, (M*N)*sizeof(double));

      magma_dsetmatrix(M, N, A, LDA, dA, LDA);
      magma_dgetrf_gpu(M, N, dA, LDA, ipiv, &info);
      magma_dgetmatrix(M, N, dA, LDA, REAL(b), LDA);

      magma_free(dA);
   } else {
      double *hA;

      magma_malloc_pinned((void**)&hA, (M*N)*sizeof(double));

      lapackf77_dlacpy(MagmaUpperLowerStr, &M, &N, A, &LDA, hA, &LDA);
      magma_dgetrf(M, N, hA, LDA, ipiv, &info);
      lapackf77_dlacpy(MagmaUpperLowerStr, &M, &N, hA, &LDA, REAL(b), &LDA);

      magma_free_pinned(hA);
   }

   if(info < 0) error("illegal argument %d in 'magLU'", -1 * info);
   else if(info > 0) error("factor U is singular");

   UNPROTECT(2);

   return b;
}
예제 #3
0
SEXP magma_dgeMatrix_LU_(SEXP x, Rboolean warn_sing)
{
#ifdef HIPLAR_WITH_MAGMA
	SEXP val = get_factors(x, "LU");
	int *dims, npiv, info;

	if (val != R_NilValue) {
//		R_ShowMessage("already in slot");	/* nothing to do if it's there in 'factors' slot */
		return val;
	}

	dims = INTEGER(GET_SLOT(x, Matrix_DimSym));
	if (dims[0] < 1 || dims[1] < 1)
		error(_("Cannot factor a matrix with zero extents"));
	npiv = (dims[0] < dims[1]) ? dims[0] : dims[1];
	val = PROTECT(NEW_OBJECT(MAKE_CLASS("denseLU")));
	slot_dup(val, x, Matrix_xSym);
	slot_dup(val, x, Matrix_DimSym);
	double *h_R = REAL(GET_SLOT(val, Matrix_xSym));
	int *ipiv = INTEGER(ALLOC_SLOT(val, Matrix_permSym, INTSXP, npiv));
	
	if(GPUFlag == 0){
#ifdef HIPLAR_DBG
		R_ShowMessage("DBG: LU decomposition using dgetrf;");
#endif
		F77_CALL(dgetrf)(dims, dims + 1, h_R,
				dims,
				ipiv,
				&info);
	}
	else if(GPUFlag == 1 && Interface == 0){
#ifdef HIPLAR_DBG
		R_ShowMessage("DBG: LU decomposition using magma_dgetrf;");
#endif
			magma_dgetrf(dims[0], dims[1], h_R, dims[0], ipiv, &info);
		}
		else if(GPUFlag == 1 && Interface == 1) {
			
#ifdef HIPLAR_DBG
			R_ShowMessage("DBG: LU decomposition using magma_dgetrf_gpu;");	
#endif
			double *d_A;
			int N2 = dims[0] * dims[1];
			cublasStatus retStatus;

			cublasAlloc( N2 , sizeof(double), (void**)&d_A);
			
			/* Error Checking */
			retStatus = cublasGetError ();
			if (retStatus != CUBLAS_STATUS_SUCCESS) 
				error(_("CUBLAS: Error in Memory Allocation"));
			/********************************************/

			cublasSetVector(N2, sizeof(double), h_R, 1, d_A, 1);
			
			/* Error Checking */
			retStatus = cublasGetError ();
				if (retStatus != CUBLAS_STATUS_SUCCESS) 
					error(_("CUBLAS: Error in Date Transfer to Device"));
			/********************************************/

			magma_dgetrf_gpu(dims[0],dims[1], d_A, dims[0], ipiv,  &info);
			
			cublasGetVector( N2, sizeof(double), d_A, 1, h_R, 1);		
			
			/* Error Checking */
			retStatus = cublasGetError ();
				if (retStatus != CUBLAS_STATUS_SUCCESS) 
					error(_("CUBLAS: Error in Date Transfer from Device"));
			/********************************************/
			
				cublasFree(d_A);
		
			/* Error Checking */
			retStatus = cublasGetError ();
				if (retStatus != CUBLAS_STATUS_SUCCESS) 
					error(_("CUBLAS: Error freeing data"));
			/********************************************/
		}
		else
			error(_("MAGMA/LAPACK/Interface Flag not defined correctly"));
		
	if (info < 0)
		error(_("Lapack routine %s returned error code %d"), "dgetrf", info);
	else if (info > 0 && warn_sing)
		warning(_("Exact singularity detected during LU decomposition: %s, i=%d."),
				"U[i,i]=0", info);
	UNPROTECT(1);

	return set_factors(x, val, "LU");
#endif

	    return R_NilValue;
}
예제 #4
0
/* ////////////////////////////////////////////////////////////////////////////
   -- Testing dgetrf
*/
int main( int argc, char** argv)
{
    TESTING_INIT();

    real_Double_t   gflops, gpu_perf, gpu_time, cpu_perf=0, cpu_time=0;
    double          error;
    double *h_A;
    magma_int_t     *ipiv;
    magma_int_t     M, N, n2, lda, ldda, info, min_mn;
    magma_int_t     status = 0;
    
    magma_opts opts;
    parse_opts( argc, argv, &opts );
    
    double tol = opts.tolerance * lapackf77_dlamch("E");

    printf("ngpu %d\n", (int) opts.ngpu );
    if ( opts.check == 2 ) {
        printf("    M     N   CPU GFlop/s (sec)   GPU GFlop/s (sec)   |Ax-b|/(N*|A|*|x|)\n");
    }
    else {
        printf("    M     N   CPU GFlop/s (sec)   GPU GFlop/s (sec)   |PA-LU|/(N*|A|)\n");
    }
    printf("=========================================================================\n");
    for( int itest = 0; itest < opts.ntest; ++itest ) {
        for( int iter = 0; iter < opts.niter; ++iter ) {
            M = opts.msize[itest];
            N = opts.nsize[itest];
            min_mn = min(M, N);
            lda    = M;
            n2     = lda*N;
            ldda   = ((M+31)/32)*32;
            gflops = FLOPS_DGETRF( M, N ) / 1e9;
            
            TESTING_MALLOC_CPU( ipiv, magma_int_t, min_mn );
            TESTING_MALLOC_PIN( h_A,  double, n2 );
            
            /* =====================================================================
               Performs operation using LAPACK
               =================================================================== */
            if ( opts.lapack ) {
                init_matrix( M, N, h_A, lda );
                
                cpu_time = magma_wtime();
                lapackf77_dgetrf(&M, &N, h_A, &lda, ipiv, &info);
                cpu_time = magma_wtime() - cpu_time;
                cpu_perf = gflops / cpu_time;
                if (info != 0)
                    printf("lapackf77_dgetrf returned error %d: %s.\n",
                           (int) info, magma_strerror( info ));
            }
            
            /* ====================================================================
               Performs operation using MAGMA
               =================================================================== */
            init_matrix( M, N, h_A, lda );
            
            gpu_time = magma_wtime();
            magma_dgetrf( M, N, h_A, lda, ipiv, &info);
            gpu_time = magma_wtime() - gpu_time;
            gpu_perf = gflops / gpu_time;
            if (info != 0)
                printf("magma_dgetrf returned error %d: %s.\n",
                       (int) info, magma_strerror( info ));
            
            /* =====================================================================
               Check the factorization
               =================================================================== */
            if ( opts.lapack ) {
                printf("%5d %5d   %7.2f (%7.2f)   %7.2f (%7.2f)",
                       (int) M, (int) N, cpu_perf, cpu_time, gpu_perf, gpu_time );
            }
            else {
                printf("%5d %5d     ---   (  ---  )   %7.2f (%7.2f)",
                       (int) M, (int) N, gpu_perf, gpu_time );
            }
            if ( opts.check == 2 ) {
                error = get_residual( M, N, h_A, lda, ipiv );
                printf("   %8.2e   %s\n", error, (error < tol ? "ok" : "failed"));
                status += ! (error < tol);
            }
            else if ( opts.check ) {
                error = get_LU_error( M, N, h_A, lda, ipiv );
                printf("   %8.2e   %s\n", error, (error < tol ? "ok" : "failed"));
                status += ! (error < tol);
            }
            else {
                printf("     ---   \n");
            }
            
            TESTING_FREE_CPU( ipiv );
            TESTING_FREE_PIN( h_A  );
            fflush( stdout );
        }
        if ( opts.niter > 1 ) {
            printf( "\n" );
        }
    }

    TESTING_FINALIZE();
    return status;
}
예제 #5
0
extern "C" magma_int_t
magma_dgesv(     magma_int_t n, magma_int_t nrhs, 
                 double *A, magma_int_t lda,
                 magma_int_t *ipiv, 
                 double *B, magma_int_t ldb, 
                 magma_int_t *info)
{
/*  -- MAGMA (version 1.3.0) --
       Univ. of Tennessee, Knoxville
       Univ. of California, Berkeley
       Univ. of Colorado, Denver
       November 2012

    Purpose
    =======

    Solves a system of linear equations
       A * X = B
    where A is a general N-by-N matrix and X and B are N-by-NRHS matrices.
    The LU decomposition with partial pivoting and row interchanges is
    used to factor A as
       A = P * L * U,
    where P is a permutation matrix, L is unit lower triangular, and U is
    upper triangular.  The factored form of A is then used to solve the
    system of equations A * X = B.

    Arguments
    =========

    N       (input) INTEGER
            The order of the matrix A.  N >= 0.

    NRHS    (input) INTEGER
            The number of right hand sides, i.e., the number of columns
            of the matrix B.  NRHS >= 0.

    A       (input/output) DOUBLE_PRECISION array, dimension (LDA,N).
            On entry, the M-by-N matrix to be factored.
            On exit, the factors L and U from the factorization
            A = P*L*U; the unit diagonal elements of L are not stored.

    LDA     (input) INTEGER
            The leading dimension of the array A.  LDA >= max(1,N).

    IPIV    (output) INTEGER array, dimension (min(M,N))
            The pivot indices; for 1 <= i <= min(M,N), row i of the
            matrix was interchanged with row IPIV(i).

    B       (input/output) DOUBLE_PRECISION array, dimension (LDB,NRHS)
            On entry, the right hand side matrix B.
            On exit, the solution matrix X.

    LDB     (input) INTEGER
            The leading dimension of the array B.  LDB >= max(1,N).

    INFO    (output) INTEGER
            = 0:  successful exit
            < 0:  if INFO = -i, the i-th argument had an illegal value
    =====================================================================    */

    magma_int_t num_gpus, ldda, lddb;
    
    *info = 0;
    if (n < 0) {
        *info = -1;
    } else if (nrhs < 0) {
        *info = -2;
    } else if (lda < max(1,n)) {
        *info = -4;
    } else if (ldb < max(1,n)) {
        *info = -7;
    }
    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

    /* Quick return if possible */
    if (n == 0 || nrhs == 0) {
        return *info;
    }
    
    /* If single-GPU and allocation suceeds, use GPU interface. */
    num_gpus = magma_num_gpus();
    double *dA, *dB;
    if ( num_gpus > 1 ) {
        goto CPU_INTERFACE;
    }
    ldda = ((n+31)/32)*32;
    lddb = ldda;
    if ( MAGMA_SUCCESS != magma_dmalloc( &dA, ldda*n )) {
        goto CPU_INTERFACE;
    }
    if ( MAGMA_SUCCESS != magma_dmalloc( &dB, lddb*nrhs )) {
        magma_free( dA );
        dA = NULL;
        goto CPU_INTERFACE;
    }
    assert( num_gpus == 1 && dA != NULL && dB != NULL );
    magma_dsetmatrix( n, n, A, lda, dA, ldda );
    magma_dgetrf_gpu( n, n, dA, ldda, ipiv, info );
    magma_dgetmatrix( n, n, dA, ldda, A, lda );
    if ( *info == 0 ) {
        magma_dsetmatrix( n, nrhs, B, ldb, dB, lddb );
        magma_dgetrs_gpu( MagmaNoTrans, n, nrhs, dA, ldda, ipiv, dB, lddb, info );
        magma_dgetmatrix( n, nrhs, dB, lddb, B, ldb );
    }
    magma_free( dA );
    magma_free( dB );
    return *info;

CPU_INTERFACE:
    /* If multi-GPU or allocation failed, use CPU interface and LAPACK.
     * Faster to use LAPACK for getrs than to copy A to GPU. */
    magma_dgetrf( n, n, A, lda, ipiv, info );
    if ( *info == 0 ) {
        lapackf77_dgetrs( MagmaNoTransStr, &n, &nrhs, A, &lda, ipiv, B, &ldb, info );
    }
    return *info;
}