SEXP dgeMatrix_LU_(SEXP x, Rboolean warn_sing) { SEXP val = get_factors(x, "LU"); int *dims, npiv, info; if (val != R_NilValue) /* nothing to do if it's there in 'factors' slot */ return val; dims = INTEGER(GET_SLOT(x, Matrix_DimSym)); if (dims[0] < 1 || dims[1] < 1) error(_("Cannot factor a matrix with zero extents")); npiv = (dims[0] <dims[1]) ? dims[0] : dims[1]; val = PROTECT(NEW_OBJECT(MAKE_CLASS("denseLU"))); slot_dup(val, x, Matrix_xSym); slot_dup(val, x, Matrix_DimSym); F77_CALL(dgetrf)(dims, dims + 1, REAL(GET_SLOT(val, Matrix_xSym)), dims, INTEGER(ALLOC_SLOT(val, Matrix_permSym, INTSXP, npiv)), &info); if (info < 0) error(_("Lapack routine %s returned error code %d"), "dgetrf", info); else if (info > 0 && warn_sing) warning(_("Exact singularity detected during LU decomposition.")); UNPROTECT(1); return set_factors(x, val, "LU"); }
SEXP dppMatrix_solve(SEXP x) { SEXP Chol = dppMatrix_chol(x); SEXP val = PROTECT(NEW_OBJECT(MAKE_CLASS("dppMatrix"))); int *dims = INTEGER(GET_SLOT(x, Matrix_DimSym)), info; slot_dup(val, Chol, Matrix_uploSym); slot_dup(val, Chol, Matrix_xSym); slot_dup(val, Chol, Matrix_DimSym); F77_CALL(dpptri)(uplo_P(val), dims, REAL(GET_SLOT(val, Matrix_xSym)), &info); UNPROTECT(1); return val; }
SEXP dppMatrix_chol(SEXP x) { SEXP val = get_factors(x, "pCholesky"), dimP = GET_SLOT(x, Matrix_DimSym), uploP = GET_SLOT(x, Matrix_uploSym); const char *uplo = CHAR(STRING_ELT(uploP, 0)); int *dims = INTEGER(dimP), info; if (val != R_NilValue) return val; dims = INTEGER(dimP); val = PROTECT(NEW_OBJECT(MAKE_CLASS("pCholesky"))); SET_SLOT(val, Matrix_uploSym, duplicate(uploP)); SET_SLOT(val, Matrix_diagSym, mkString("N")); SET_SLOT(val, Matrix_DimSym, duplicate(dimP)); slot_dup(val, x, Matrix_xSym); F77_CALL(dpptrf)(uplo, dims, REAL(GET_SLOT(val, Matrix_xSym)), &info); if (info) { if(info > 0) /* e.g. x singular */ error(_("the leading minor of order %d is not positive definite"), info); else /* should never happen! */ error(_("Lapack routine %s returned error code %d"), "dpptrf", info); } UNPROTECT(1); return set_factors(x, val, "pCholesky"); }
SEXP dtrMatrix_chol2inv(SEXP a) { SEXP val = PROTECT(NEW_OBJECT(MAKE_CLASS("dpoMatrix"))); int info, n; slot_dup(val, a, Matrix_DimSym); slot_dup(val, a, Matrix_uploSym); slot_dup(val, a, Matrix_diagSym); slot_dup(val, a, Matrix_DimNamesSym); slot_dup(val, a, Matrix_xSym); n = *INTEGER(GET_SLOT(val, Matrix_DimSym)); F77_CALL(dpotri)(uplo_P(val), &n, REAL(GET_SLOT(val, Matrix_xSym)), &n, &info); UNPROTECT(1); return val; }
SEXP dgeMatrix_solve(SEXP a) { /* compute the 1-norm of the matrix, which is needed later for the computation of the reciprocal condition number. */ double aNorm = get_norm(a, "1"); /* the LU decomposition : */ SEXP val = PROTECT(NEW_OBJECT(MAKE_CLASS("dgeMatrix"))), lu = dgeMatrix_LU_(a, TRUE); int *dims = INTEGER(GET_SLOT(lu, Matrix_DimSym)), *pivot = INTEGER(GET_SLOT(lu, Matrix_permSym)); /* prepare variables for the dgetri calls */ double *x, tmp; int info, lwork = -1; if (dims[0] != dims[1]) error(_("Solve requires a square matrix")); slot_dup(val, lu, Matrix_xSym); x = REAL(GET_SLOT(val, Matrix_xSym)); slot_dup(val, lu, Matrix_DimSym); if(dims[0]) /* the dimension is not zero */ { /* is the matrix is *computationally* singular ? */ double rcond; F77_CALL(dgecon)("1", dims, x, dims, &aNorm, &rcond, (double *) R_alloc(4*dims[0], sizeof(double)), (int *) R_alloc(dims[0], sizeof(int)), &info); if (info) error(_("error [%d] from Lapack 'dgecon()'"), info); if(rcond < DOUBLE_EPS) error(_("Lapack dgecon(): system computationally singular, reciprocal condition number = %g"), rcond); /* only now try the inversion and check if the matrix is *exactly* singular: */ F77_CALL(dgetri)(dims, x, dims, pivot, &tmp, &lwork, &info); lwork = (int) tmp; F77_CALL(dgetri)(dims, x, dims, pivot, (double *) R_alloc((size_t) lwork, sizeof(double)), &lwork, &info); if (info) error(_("Lapack routine dgetri: system is exactly singular")); } UNPROTECT(1); return val; }
SEXP dsyMatrix_solve(SEXP a) { SEXP trf = dsyMatrix_trf(a); SEXP val = PROTECT(NEW_OBJECT_OF_CLASS("dsyMatrix")); int *dims = INTEGER(GET_SLOT(trf, Matrix_DimSym)), info; slot_dup(val, trf, Matrix_uploSym); slot_dup(val, trf, Matrix_xSym); slot_dup(val, trf, Matrix_DimSym); F77_CALL(dsytri)(uplo_P(val), dims, REAL(GET_SLOT(val, Matrix_xSym)), dims, INTEGER(GET_SLOT(trf, Matrix_permSym)), (double *) R_alloc((long) dims[0], sizeof(double)), &info); UNPROTECT(1); return val; }
// n.CMatrix --> [dli].CMatrix (not going through CHM!) SEXP nz2Csparse(SEXP x, enum x_slot_kind r_kind) { const char *cl_x = class_P(x); if(cl_x[0] != 'n') error(_("not a 'n.CMatrix'")); if(cl_x[2] != 'C') error(_("not a CsparseMatrix")); int nnz = LENGTH(GET_SLOT(x, Matrix_iSym)); SEXP ans; char *ncl = strdup(cl_x); double *dx_x; int *ix_x; ncl[0] = (r_kind == x_double ? 'd' : (r_kind == x_logical ? 'l' : /* else (for now): r_kind == x_integer : */ 'i')); PROTECT(ans = NEW_OBJECT(MAKE_CLASS(ncl))); // create a correct 'x' slot: switch(r_kind) { int i; case x_double: // 'd' dx_x = REAL(ALLOC_SLOT(ans, Matrix_xSym, REALSXP, nnz)); for (i=0; i < nnz; i++) dx_x[i] = 1.; break; case x_logical: // 'l' ix_x = LOGICAL(ALLOC_SLOT(ans, Matrix_xSym, LGLSXP, nnz)); for (i=0; i < nnz; i++) ix_x[i] = TRUE; break; case x_integer: // 'i' ix_x = INTEGER(ALLOC_SLOT(ans, Matrix_xSym, INTSXP, nnz)); for (i=0; i < nnz; i++) ix_x[i] = 1; break; default: error(_("nz2Csparse(): invalid/non-implemented r_kind = %d"), r_kind); } // now copy all other slots : slot_dup(ans, x, Matrix_iSym); slot_dup(ans, x, Matrix_pSym); slot_dup(ans, x, Matrix_DimSym); slot_dup(ans, x, Matrix_DimNamesSym); if(ncl[1] != 'g') { // symmetric or triangular ... slot_dup_if_has(ans, x, Matrix_uploSym); slot_dup_if_has(ans, x, Matrix_diagSym); } UNPROTECT(1); return ans; }
SEXP R_to_CMatrix(SEXP x) { SEXP ans, tri = PROTECT(allocVector(LGLSXP, 1)); char *ncl = strdup(class_P(x)); static const char *valid[] = { MATRIX_VALID_Rsparse, ""}; int ctype = R_check_class_etc(x, valid); int *x_dims = INTEGER(GET_SLOT(x, Matrix_DimSym)), *a_dims; PROTECT_INDEX ipx; if (ctype < 0) error(_("invalid class(x) '%s' in R_to_CMatrix(x)"), ncl); /* replace 'R' with 'C' : */ ncl[2] = 'C'; PROTECT_WITH_INDEX(ans = NEW_OBJECT(MAKE_CLASS(ncl)), &ipx); a_dims = INTEGER(ALLOC_SLOT(ans, Matrix_DimSym, INTSXP, 2)); /* reversed dim() since we will transpose: */ a_dims[0] = x_dims[1]; a_dims[1] = x_dims[0]; /* triangular: */ LOGICAL(tri)[0] = 0; if((ctype / 3) != 2) /* not n..Matrix */ slot_dup(ans, x, Matrix_xSym); if(ctype % 3) { /* s(ymmetric) or t(riangular) : */ SET_SLOT(ans, Matrix_uploSym, mkString((*uplo_P(x) == 'U') ? "L" : "U")); if(ctype % 3 == 2) { /* t(riangular) : */ LOGICAL(tri)[0] = 1; slot_dup(ans, x, Matrix_diagSym); } } SET_SLOT(ans, Matrix_iSym, duplicate(GET_SLOT(x, Matrix_jSym))); slot_dup(ans, x, Matrix_pSym); REPROTECT(ans = Csparse_transpose(ans, tri), ipx); SET_DimNames(ans, x); // possibly asymmetric for symmetricMatrix is ok free(ncl); UNPROTECT(2); return ans; }
/* This and the following R_to_CMatrix() lead to memory-not-mapped seg.faults * only with {32bit + R-devel + enable-R-shlib} -- no idea why */ SEXP compressed_to_TMatrix(SEXP x, SEXP colP) { int col = asLogical(colP); /* 1 if "C"olumn compressed; 0 if "R"ow */ /* however, for Csparse, we now effectively use the cholmod-based * Csparse_to_Tsparse() in ./Csparse.c ; maybe should simply write * an as_cholmod_Rsparse() function and then do "as there" ...*/ SEXP indSym = col ? Matrix_iSym : Matrix_jSym, ans, indP = GET_SLOT(x, indSym), pP = GET_SLOT(x, Matrix_pSym); int npt = length(pP) - 1; char *ncl = strdup(class_P(x)); static const char *valid[] = { MATRIX_VALID_Csparse, MATRIX_VALID_Rsparse, ""}; int ctype = R_check_class_etc(x, valid); if (ctype < 0) error(_("invalid class(x) '%s' in compressed_to_TMatrix(x)"), ncl); /* replace 'C' or 'R' with 'T' :*/ ncl[2] = 'T'; ans = PROTECT(NEW_OBJECT(MAKE_CLASS(ncl))); slot_dup(ans, x, Matrix_DimSym); if((ctype / 3) % 4 != 2) /* not n..Matrix */ slot_dup(ans, x, Matrix_xSym); if(ctype % 3) { /* s(ymmetric) or t(riangular) : */ slot_dup(ans, x, Matrix_uploSym); if(ctype % 3 == 2) /* t(riangular) : */ slot_dup(ans, x, Matrix_diagSym); } SET_DimNames(ans, x); // possibly asymmetric for symmetricMatrix is ok SET_SLOT(ans, indSym, duplicate(indP)); expand_cmprPt(npt, INTEGER(pP), INTEGER(ALLOC_SLOT(ans, col ? Matrix_jSym : Matrix_iSym, INTSXP, length(indP)))); free(ncl); UNPROTECT(1); return ans; }
SEXP dgeMatrix_solve(SEXP a) { SEXP val = PROTECT(NEW_OBJECT(MAKE_CLASS("dgeMatrix"))), lu = dgeMatrix_LU_(a, TRUE); int *dims = INTEGER(GET_SLOT(lu, Matrix_DimSym)), *pivot = INTEGER(GET_SLOT(lu, Matrix_permSym)); double *x, tmp; int info, lwork = -1; if (dims[0] != dims[1]) error(_("Solve requires a square matrix")); slot_dup(val, lu, Matrix_xSym); x = REAL(GET_SLOT(val, Matrix_xSym)); slot_dup(val, lu, Matrix_DimSym); F77_CALL(dgetri)(dims, x, dims, pivot, &tmp, &lwork, &info); lwork = (int) tmp; F77_CALL(dgetri)(dims, x, dims, pivot, (double *) R_alloc((size_t) lwork, sizeof(double)), &lwork, &info); if (info) error(_("Lapack routine dgetri: system is exactly singular")); UNPROTECT(1); return val; }
SEXP dtCMatrix_sparse_solve(SEXP a, SEXP b) { SEXP ans = PROTECT(NEW_OBJECT(MAKE_CLASS("dgCMatrix"))); CSP A = AS_CSP(a), B = AS_CSP(b); int *xp = INTEGER(ALLOC_SLOT(ans, Matrix_pSym, INTSXP, (B->n) + 1)), xnz = 10 * B->p[B->n]; /* initial estimate of nnz in x */ int *ti = Calloc(xnz, int), k, lo = uplo_P(a)[0] == 'L', pos = 0; double *tx = Calloc(xnz, double); double *wrk = Alloca(A->n, double); int *xi = Alloca(2*A->n, int); /* for cs_reach */ R_CheckStack(); if (A->m != A->n || B->n < 1 || A->n < 1 || A->n != B->m) error(_("Dimensions of system to be solved are inconsistent")); slot_dup(ans, b, Matrix_DimSym); SET_DimNames(ans, b); xp[0] = 0; for (k = 0; k < B->n; k++) { int top = cs_spsolve (A, B, k, xi, wrk, (int *)NULL, lo); int nz = A->n - top, p; xp[k + 1] = nz + xp[k]; if (xp[k + 1] > xnz) { while (xp[k + 1] > xnz) xnz *= 2; ti = Realloc(ti, xnz, int); tx = Realloc(tx, xnz, double); } if (lo) /* increasing row order */ for(p = top; p < A->n; p++, pos++) { ti[pos] = xi[p]; tx[pos] = wrk[xi[p]]; } else /* decreasing order, reverse copy */ for(p = A->n - 1; p >= top; p--, pos++) { ti[pos] = xi[p]; tx[pos] = wrk[xi[p]]; } } xnz = xp[B->n]; Memcpy(INTEGER(ALLOC_SLOT(ans, Matrix_iSym, INTSXP, xnz)), ti, xnz); Memcpy( REAL(ALLOC_SLOT(ans, Matrix_xSym, REALSXP, xnz)), tx, xnz); Free(ti); Free(tx); UNPROTECT(1); return ans; }
/** * Return a SuiteSparse QR factorization of the sparse matrix A * * @param Ap (pointer to) a [m x n] dgCMatrix * @param ordering integer SEXP specifying the ordering strategy to be used * see SPQR/Include/SuiteSparseQR_definitions.h * @param econ integer SEXP ("economy"): number of rows of R and columns of Q * to return. The default is m. Using n gives the standard economy form. * A value less than the estimated rank r is set to r, so econ=0 gives the * "rank-sized" factorization, where nrow(R)==nnz(diag(R))==r. * @param tol double SEXP: if tol <= -2 use SPQR's default, * if -2 < tol < 0, then no tol is used; otherwise, * tol > 0, use as tolerance: columns with 2-norm <= tol treated as 0 * * * @return SEXP "SPQR" object with slots (Q, R, p, rank, Dim): * Q: dgCMatrix; R: dgCMatrix [subject to change to dtCMatrix FIXME ?] * p: integer: 0-based permutation (or length 0 <=> identity); * rank: integer, the "revealed" rank Dim: integer, original matrix dim. */ SEXP dgCMatrix_SPQR(SEXP Ap, SEXP ordering, SEXP econ, SEXP tol) { /* SEXP ans = PROTECT(allocVector(VECSXP, 4)); */ SEXP ans = PROTECT(NEW_OBJECT(MAKE_CLASS("SPQR"))); CHM_SP A = AS_CHM_SP(Ap), Q, R; SuiteSparse_long *E, rank;/* not always = int FIXME (Windows_64 ?) */ if ((rank = SuiteSparseQR_C_QR(asInteger(ordering), asReal(tol),/* originally had SPQR_DEFAULT_TOL */ (SuiteSparse_long)asInteger(econ),/* originally had 0 */ A, &Q, &R, &E, &cl)) == -1) error(_("SuiteSparseQR_C_QR returned an error code")); slot_dup(ans, Ap, Matrix_DimSym); /* SET_VECTOR_ELT(ans, 0, */ /* chm_sparse_to_SEXP(Q, 0, 0, 0, "", R_NilValue)); */ SET_SLOT(ans, install("Q"), chm_sparse_to_SEXP(Q, 0, 0, 0, "", R_NilValue)); /* Also gives a dgCMatrix (not a dtC* *triangular*) : * may make sense if to be used in the "spqr_solve" routines .. ?? */ /* SET_VECTOR_ELT(ans, 1, */ /* chm_sparse_to_SEXP(R, 0, 0, 0, "", R_NilValue)); */ SET_SLOT(ans, install("R"), chm_sparse_to_SEXP(R, 0, 0, 0, "", R_NilValue)); cholmod_free_sparse(&Al, &cl); cholmod_free_sparse(&R, &cl); cholmod_free_sparse(&Q, &cl); if (E) { int *Er; SET_VECTOR_ELT(ans, 2, allocVector(INTSXP, A->ncol)); Er = INTEGER(VECTOR_ELT(ans, 2)); for (int i = 0; i < A->ncol; i++) Er[i] = (int) E[i]; Free(E); } else SET_VECTOR_ELT(ans, 2, allocVector(INTSXP, 0)); SET_VECTOR_ELT(ans, 3, ScalarInteger((int)rank)); UNPROTECT(1); return ans; }
SEXP magma_dgeMatrix_LU_(SEXP x, Rboolean warn_sing) { #ifdef HIPLAR_WITH_MAGMA SEXP val = get_factors(x, "LU"); int *dims, npiv, info; if (val != R_NilValue) { // R_ShowMessage("already in slot"); /* nothing to do if it's there in 'factors' slot */ return val; } dims = INTEGER(GET_SLOT(x, Matrix_DimSym)); if (dims[0] < 1 || dims[1] < 1) error(_("Cannot factor a matrix with zero extents")); npiv = (dims[0] < dims[1]) ? dims[0] : dims[1]; val = PROTECT(NEW_OBJECT(MAKE_CLASS("denseLU"))); slot_dup(val, x, Matrix_xSym); slot_dup(val, x, Matrix_DimSym); double *h_R = REAL(GET_SLOT(val, Matrix_xSym)); int *ipiv = INTEGER(ALLOC_SLOT(val, Matrix_permSym, INTSXP, npiv)); if(GPUFlag == 0){ #ifdef HIPLAR_DBG R_ShowMessage("DBG: LU decomposition using dgetrf;"); #endif F77_CALL(dgetrf)(dims, dims + 1, h_R, dims, ipiv, &info); } else if(GPUFlag == 1 && Interface == 0){ #ifdef HIPLAR_DBG R_ShowMessage("DBG: LU decomposition using magma_dgetrf;"); #endif magma_dgetrf(dims[0], dims[1], h_R, dims[0], ipiv, &info); } else if(GPUFlag == 1 && Interface == 1) { #ifdef HIPLAR_DBG R_ShowMessage("DBG: LU decomposition using magma_dgetrf_gpu;"); #endif double *d_A; int N2 = dims[0] * dims[1]; cublasStatus retStatus; cublasAlloc( N2 , sizeof(double), (void**)&d_A); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Memory Allocation")); /********************************************/ cublasSetVector(N2, sizeof(double), h_R, 1, d_A, 1); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Date Transfer to Device")); /********************************************/ magma_dgetrf_gpu(dims[0],dims[1], d_A, dims[0], ipiv, &info); cublasGetVector( N2, sizeof(double), d_A, 1, h_R, 1); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Date Transfer from Device")); /********************************************/ cublasFree(d_A); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error freeing data")); /********************************************/ } else error(_("MAGMA/LAPACK/Interface Flag not defined correctly")); if (info < 0) error(_("Lapack routine %s returned error code %d"), "dgetrf", info); else if (info > 0 && warn_sing) warning(_("Exact singularity detected during LU decomposition: %s, i=%d."), "U[i,i]=0", info); UNPROTECT(1); return set_factors(x, val, "LU"); #endif return R_NilValue; }
SEXP magma_dgeMatrix_solve(SEXP a) { #ifdef HIPLAR_WITH_MAGMA /* compute the 1-norm of the matrix, which is needed later for the computation of the reciprocal condition number. */ double aNorm = magma_get_norm(a, "1"); /* the LU decomposition : */ /* Given that we may be performing this operation * on the GPU we may put in an optimisation here * where if we call the LU solver we, we do not require * the decomposition to be transferred back to CPU. This is TODO */ SEXP val = PROTECT(NEW_OBJECT(MAKE_CLASS("dgeMatrix"))), lu = magma_dgeMatrix_LU_(a, TRUE); int *dims = INTEGER(GET_SLOT(lu, Matrix_DimSym)), *pivot = INTEGER(GET_SLOT(lu, Matrix_permSym)); /* prepare variables for the dgetri calls */ double *x, tmp; int info, lwork = -1; if (dims[0] != dims[1]) error(_("Solve requires a square matrix")); slot_dup(val, lu, Matrix_xSym); x = REAL(GET_SLOT(val, Matrix_xSym)); slot_dup(val, lu, Matrix_DimSym); int N2 = dims[0] * dims[0]; if(dims[0]) /* the dimension is not zero */ { /* is the matrix is *computationally* singular ? */ double rcond; F77_CALL(dgecon)("1", dims, x, dims, &aNorm, &rcond, (double *) R_alloc(4*dims[0], sizeof(double)), (int *) R_alloc(dims[0], sizeof(int)), &info); if (info) error(_("error [%d] from Lapack 'dgecon()'"), info); if(rcond < DOUBLE_EPS) error(_("Lapack dgecon(): system computationally singular, reciprocal condition number = %g"), rcond); /* only now try the inversion and check if the matrix is *exactly* singular: */ // This is also a work space query. This is not an option in magma F77_CALL(dgetri)(dims, x, dims, pivot, &tmp, &lwork, &info); lwork = (int) tmp; if( GPUFlag == 0){ F77_CALL(dgetri)(dims, x, dims, pivot, (double *) R_alloc((size_t) lwork, sizeof(double)), &lwork, &info); #ifdef HIPLAR_DBG R_ShowMessage("DBG: Solve using LU using dgetri;"); #endif } else if(GPUFlag == 1) { double *d_x, *dwork; cublasStatus retStatus; #ifdef HIPLAR_DBG R_ShowMessage("Solve using LU using magma_dgetri;"); #endif cublasAlloc(N2, sizeof(double), (void**)&d_x); //cublasAlloc(N2 , sizeof(double), (void**)&dtmp); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Memory Allocation on Device")); /********************************************/ cublasSetVector( N2, sizeof(double), x, 1, d_x, 1); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Data Transfer to Device")); /********************************************/ lwork = dims[0] * magma_get_dgetri_nb( dims[0] ); cublasAlloc(lwork, sizeof(double), (void**)&dwork); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Memory Allocation on Device")); /********************************************/ magma_dgetri_gpu(dims[0], d_x, dims[0], pivot, dwork , lwork, &info); cublasGetVector(N2, sizeof(double), d_x, 1, x, 1); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Data From to Device")); /********************************************/ cublasFree(dwork); cublasFree(d_x); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error freeing memory")); /********************************************/ } else error(_("GPUFlag not set correctly")); if (info) error(_("Lapack routine dgetri: system is exactly singular")); } UNPROTECT(1); return val; #endif return R_NilValue; }
SEXP magma_dpoMatrix_dgeMatrix_solve(SEXP a, SEXP b) { #ifdef HIPLAR_WITH_MAGMA SEXP Chol = magma_dpoMatrix_chol(a), val = PROTECT(NEW_OBJECT(MAKE_CLASS("dgeMatrix"))); int *adims = INTEGER(GET_SLOT(a, Matrix_DimSym)), *bdims = INTEGER(GET_SLOT(b, Matrix_DimSym)), info; /* Checking Matrix Dimensions */ if (adims[1] != bdims[0]) error(_("Dimensions of system to be solved are inconsistent")); if (adims[0] < 1 || bdims[1] < 1) error(_("Cannot solve() for matrices with zero extents")); /* ****************************************** */ SET_SLOT(val, Matrix_factorSym, allocVector(VECSXP, 0)); slot_dup(val, b, Matrix_DimSym); slot_dup(val, b, Matrix_xSym); double *A = REAL(GET_SLOT(Chol, Matrix_xSym)); double *B = REAL(GET_SLOT(val, Matrix_xSym)); if(GPUFlag == 1) { #ifdef HIPLAR_DBG R_ShowMessage("DBG: Solving system of Ax = b, A = dpo, b = dge, using dpotrs_gpu;"); #endif double *d_A, *d_B; const char *uplo = uplo_P(Chol); magma_int_t NRHS = bdims[1]; magma_int_t lda = adims[1]; magma_int_t ldb = bdims[0]; magma_int_t N = adims[0]; cublasStatus retStatus; /*if(uplo == "U") uplo = MagmaUpperStr; else if(uplo == "L") uplo = MagmaLowerStr; else uplo = MagmaUpperStr; */ cublasAlloc(N * lda, sizeof(double), (void**)&d_A); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Memory Allocation")); /********************************************/ cublasAlloc(N * NRHS, sizeof(double), (void**)&d_B); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Memory Allocation")); /********************************************/ cublasSetVector( N * lda , sizeof(double), A, 1, d_A, 1); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Data Transfer to Device")); /********************************************/ cublasSetVector( ldb * NRHS, sizeof(double), B, 1, d_B, 1 ); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Data Transfer to Device")); /********************************************/ magma_dpotrs_gpu(uplo[0], N, NRHS , d_A, lda, d_B, ldb, &info); cublasGetVector( ldb * NRHS, sizeof(double), d_B, 1, B, 1); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Data Transfer from Device")); /********************************************/ cublasFree(d_A); cublasFree(d_B); } else { #ifdef HIPLAR_DBG R_ShowMessage("DBG: Solving system of Ax = b, A = dpo, b = dge, using dpotrs;"); #endif F77_CALL(dpotrs)(uplo_P(Chol), adims, bdims + 1, A , adims, B , bdims, &info); } if (info) { if(info > 0) error(_("the leading minor of order %d is not positive definite"), info); else /* should never happen! */ error(_("Lapack routine %s returned error code %d"), "dpotrf", info); } UNPROTECT(1); return val; #endif return R_NilValue; }
SEXP magma_dpoMatrix_solve(SEXP x) { #ifdef HIPLAR_WITH_MAGMA SEXP Chol = magma_dpoMatrix_chol(x); SEXP val = PROTECT(NEW_OBJECT(MAKE_CLASS("dpoMatrix"))); int *dims = INTEGER(GET_SLOT(x, Matrix_DimSym)), info; SET_SLOT(val, Matrix_factorSym, allocVector(VECSXP, 0)); slot_dup(val, Chol, Matrix_uploSym); slot_dup(val, Chol, Matrix_xSym); slot_dup(val, Chol, Matrix_DimSym); SET_SLOT(val, Matrix_DimNamesSym, duplicate(GET_SLOT(x, Matrix_DimNamesSym))); double *A = REAL(GET_SLOT(val, Matrix_xSym)); int N = *dims; int lda = N; const char *uplo = uplo_P(val); if(GPUFlag == 0) { F77_CALL(dpotri)(uplo_P(val), dims, A, dims, &info); } else if(GPUFlag == 1 && Interface == 0) { #ifdef HIPLAR_DBG R_ShowMessage("DBG: Solving using magma_dpotri"); #endif magma_dpotri(uplo[0], N, A, lda, &info); } else if(GPUFlag == 1 && Interface == 1){ double *d_A; cublasStatus retStatus; cublasAlloc( N * lda , sizeof(double), (void**)&d_A); #ifdef HIPLAR_DBG R_ShowMessage("DBG: Solving using magma_dpotri_gpu"); #endif /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Memory Allocation")); /********************************************/ cublasSetVector( N * lda, sizeof(double), A, 1, d_A, 1); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Data Transfer to Device")); /********************************************/ magma_dpotri_gpu(uplo[0], N, d_A, lda, &info); cublasGetVector(N * lda, sizeof(double), d_A, 1, val, 1); /* Error Checking */ retStatus = cublasGetError (); if (retStatus != CUBLAS_STATUS_SUCCESS) error(_("CUBLAS: Error in Data Transfer from Device")); /********************************************/ cublasFree(d_A); } else error(_("MAGMA/LAPACK/Interface Flag not defined correctly")); if (info) { if(info > 0) error(_("the leading minor of order %d is not positive definite"), info); else /* should never happen! */ error(_("Lapack routine %s returned error code %d"), "dpotrf", info); } UNPROTECT(1); return val; #endif return R_NilValue; }
SEXP Tsparse_diagU2N(SEXP x) { static const char *valid[] = { "dtTMatrix", /* 0 */ "ltTMatrix", /* 1 */ "ntTMatrix", /* 2 : no x slot */ "ztTMatrix", /* 3 */ ""}; /* #define xSXP(iTyp) ((iTyp == 0) ? REALSXP : ((iTyp == 1) ? LGLSXP : /\* else *\/ CPLXSXP)); */ /* #define xTYPE(iTyp) ((iTyp == 0) ? double : ((iTyp == 1) ? int : /\* else *\/ Rcomplex)); */ int ctype = Matrix_check_class_etc(x, valid); if (ctype < 0 || *diag_P(x) != 'U') { /* "trivially fast" when not triangular (<==> no 'diag' slot), or not *unit* triangular */ return (x); } else { /* instead of going to Csparse -> Cholmod -> Csparse -> Tsparse, work directly: */ int i, n = INTEGER(GET_SLOT(x, Matrix_DimSym))[0], nnz = length(GET_SLOT(x, Matrix_iSym)), new_n = nnz + n; SEXP ans = PROTECT(NEW_OBJECT(MAKE_CLASS(class_P(x)))); int *islot = INTEGER(ALLOC_SLOT(ans, Matrix_iSym, INTSXP, new_n)), *jslot = INTEGER(ALLOC_SLOT(ans, Matrix_jSym, INTSXP, new_n)); slot_dup(ans, x, Matrix_DimSym); SET_DimNames(ans, x); slot_dup(ans, x, Matrix_uploSym); SET_SLOT(ans, Matrix_diagSym, mkString("N")); /* Build the new i- and j- slots : first copy the current : */ Memcpy(islot, INTEGER(GET_SLOT(x, Matrix_iSym)), nnz); Memcpy(jslot, INTEGER(GET_SLOT(x, Matrix_jSym)), nnz); /* then, add the new (i,j) slot entries: */ for(i = 0; i < n; i++) { islot[i + nnz] = i; jslot[i + nnz] = i; } /* build the new x-slot : */ switch(ctype) { case 0: { /* "d" */ double *x_new = REAL(ALLOC_SLOT(ans, Matrix_xSym, REALSXP, new_n)); Memcpy(x_new, REAL(GET_SLOT(x, Matrix_xSym)), nnz); for(i = 0; i < n; i++) /* add x[i,i] = 1. */ x_new[i + nnz] = 1.; break; } case 1: { /* "l" */ int *x_new = LOGICAL(ALLOC_SLOT(ans, Matrix_xSym, LGLSXP, new_n)); Memcpy(x_new, LOGICAL(GET_SLOT(x, Matrix_xSym)), nnz); for(i = 0; i < n; i++) /* add x[i,i] = 1 (= TRUE) */ x_new[i + nnz] = 1; break; } case 2: /* "n" */ /* nothing to do here */ break; case 3: { /* "z" */ Rcomplex *x_new = COMPLEX(ALLOC_SLOT(ans, Matrix_xSym, CPLXSXP, new_n)); Memcpy(x_new, COMPLEX(GET_SLOT(x, Matrix_xSym)), nnz); for(i = 0; i < n; i++) /* add x[i,i] = 1 (= TRUE) */ x_new[i + nnz] = (Rcomplex) {1., 0.}; break; } }/* switch() */ UNPROTECT(1); return ans; } }