void XStatFree(SuperLUStat_t *stat) { if (stat->ops) { StatFree(stat); } stat->ops = NULL; }
void tlin::solve(SuperMatrix *A, SuperMatrix *BX, superlu_options_t *opt) { assert(A->nrow == A->ncol); int n = A->nrow; if (!opt) opt = &defaultOpt; SuperMatrix L, U; int *perm_c, *perm_r; perm_c = intMalloc(n); perm_r = intMalloc(n); SuperLUStat_t stat; StatInit(&stat); int result; dgssv(opt, A, perm_c, perm_r, &L, &U, BX, &stat, &result); Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); SUPERLU_FREE(perm_r); SUPERLU_FREE(perm_c); StatFree(&stat); }
void tlin::factorize(SuperMatrix *A, SuperFactors *&F, superlu_options_t *opt) { assert(A->nrow == A->ncol); int n = A->nrow; if (!F) F = (SuperFactors *)SUPERLU_MALLOC(sizeof(SuperFactors)); if (!opt) opt = &defaultOpt; F->perm_c = intMalloc(n); get_perm_c(3, A, F->perm_c); SuperMatrix AC; int *etree = intMalloc(n); sp_preorder(opt, A, F->perm_c, etree, &AC); F->L = (SuperMatrix *)SUPERLU_MALLOC(sizeof(SuperMatrix)); F->U = (SuperMatrix *)SUPERLU_MALLOC(sizeof(SuperMatrix)); F->perm_r = intMalloc(n); SuperLUStat_t stat; StatInit(&stat); int result; dgstrf(opt, &AC, sp_ienv(1), sp_ienv(2), etree, NULL, 0, F->perm_c, F->perm_r, F->L, F->U, &stat, &result); StatFree(&stat); Destroy_CompCol_Permuted(&AC); SUPERLU_FREE(etree); if (result != 0) freeF(F), F = 0; }
PetscErrorCode MatDestroy_SuperLU(Mat A) { PetscErrorCode ierr; Mat_SuperLU *lu=(Mat_SuperLU*)A->spptr; PetscFunctionBegin; if (lu && lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */ Destroy_SuperMatrix_Store(&lu->A); Destroy_SuperMatrix_Store(&lu->B); Destroy_SuperMatrix_Store(&lu->X); StatFree(&lu->stat); if (lu->lwork >= 0) { Destroy_SuperNode_Matrix(&lu->L); Destroy_CompCol_Matrix(&lu->U); } } if (lu) { ierr = PetscFree(lu->etree);CHKERRQ(ierr); ierr = PetscFree(lu->perm_r);CHKERRQ(ierr); ierr = PetscFree(lu->perm_c);CHKERRQ(ierr); ierr = PetscFree(lu->R);CHKERRQ(ierr); ierr = PetscFree(lu->C);CHKERRQ(ierr); ierr = PetscFree(lu->rhs_dup);CHKERRQ(ierr); ierr = MatDestroy(&lu->A_dup);CHKERRQ(ierr); } ierr = PetscFree(A->spptr);CHKERRQ(ierr); /* clear composed functions */ ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatFactorGetSolverPackage_C","",PETSC_NULL);CHKERRQ(ierr); ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatSuperluSetILUDropTol_C","",PETSC_NULL);CHKERRQ(ierr); ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); PetscFunctionReturn(0); }
int HYPRE_ParCSR_SuperLUSolve(HYPRE_Solver solver, HYPRE_ParCSRMatrix A, HYPRE_ParVector b, HYPRE_ParVector x ) { #ifdef HAVE_SUPERLU int nrows, i, info; double *bData, *xData; SuperMatrix B; SuperLUStat_t slu_stat; trans_t trans; HYPRE_SuperLU *sluPtr = (HYPRE_SuperLU *) solver; /* ---------------------------------------------------------------- */ /* make sure setup has been called */ /* ---------------------------------------------------------------- */ assert ( sluPtr != NULL ); if ( ! (sluPtr->factorized_) ) { printf("HYPRE_ParCSR_SuperLUSolve ERROR - not factorized yet.\n"); return -1; } /* ---------------------------------------------------------------- */ /* fetch right hand side and solution vector */ /* ---------------------------------------------------------------- */ xData = hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector *)x)); bData = hypre_VectorData(hypre_ParVectorLocalVector((hypre_ParVector *)b)); nrows = hypre_ParVectorGlobalSize((hypre_ParVector *)x); for (i = 0; i < nrows; i++) xData[i] = bData[i]; /* ---------------------------------------------------------------- */ /* solve */ /* ---------------------------------------------------------------- */ dCreate_Dense_Matrix(&B, nrows, 1, bData, nrows, SLU_DN, SLU_D,SLU_GE); /* ------------------------------------------------------------- * solve the problem * -----------------------------------------------------------*/ trans = NOTRANS; StatInit(&slu_stat); dgstrs (trans, &(sluPtr->SLU_Lmat), &(sluPtr->SLU_Umat), sluPtr->permC_, sluPtr->permR_, &B, &slu_stat, &info); Destroy_SuperMatrix_Store(&B); StatFree(&slu_stat); return 0; #else printf("HYPRE_ParCSR_SuperLUSolve ERROR - SuperLU not enabled.\n"); *solver = (HYPRE_Solver) NULL; return -1; #endif }
static void __nlFree_SUPERLU(__NLContext *context) { Destroy_SuperNode_Matrix(&(context->slu.L)); Destroy_CompCol_Matrix(&(context->slu.U)); StatFree(&(context->slu.stat)); __NL_DELETE_ARRAY(context->slu.perm_r); __NL_DELETE_ARRAY(context->slu.perm_c); context->slu.alloc_slu = NL_FALSE; }
void tlin::solve(SuperFactors *F, SuperMatrix *BX, superlu_options_t *opt) { assert(F); if (!opt) opt = &defaultOpt; SuperLUStat_t stat; StatInit(&stat); int result; dgstrs(NOTRANS, F->L, F->U, F->perm_c, F->perm_r, BX, &stat, &result); StatFree(&stat); }
static void SparseFactor_dealloc(SparseFactor* self) { free(self->perm_c); free(self->perm_r); free(self->etree); if (self->AC.Store != NULL) Destroy_CompCol_Permuted(&self->AC); if (self->L.Store != NULL) Destroy_SuperNode_Matrix(&self->L); if (self->U.Store != NULL) Destroy_CompCol_Matrix(&self->U); StatFree(&self->stat); self->ob_type->tp_free((PyObject*)self); }
static void slm_dtor(void* context) { slm_t* mat = context; if (mat->cperm != NULL) { SUPERLU_FREE(mat->cperm); SUPERLU_FREE(mat->rperm); Destroy_SuperNode_Matrix(&mat->L); Destroy_CompCol_Matrix(&mat->U); } supermatrix_free(mat->A); Destroy_SuperMatrix_Store(&mat->rhs); polymec_free(mat->rhs_data); Destroy_SuperMatrix_Store(&mat->X); polymec_free(mat->X_data); polymec_free(mat->R); polymec_free(mat->C); StatFree(&mat->stat); if (mat->etree != NULL) polymec_free(mat->etree); mat->ilu_params = NULL; adj_graph_free(mat->sparsity); polymec_free(mat); }
int main(int argc, char *argv[]) { /* * Purpose * ======= * * The driver program CLINSOLX2. * * This example illustrates how to use CGSSVX to solve systems repeatedly * with the same sparsity pattern of matrix A. * In this case, the column permutation vector perm_c is computed once. * The following data structures will be reused in the subsequent call to * CGSSVX: perm_c, etree * */ char equed[1]; yes_no_t equil; trans_t trans; SuperMatrix A, A1, L, U; SuperMatrix B, B1, X; NCformat *Astore; NCformat *Ustore; SCformat *Lstore; complex *a, *a1; int *asub, *xa, *asub1, *xa1; int *perm_r; /* row permutations from partial pivoting */ int *perm_c; /* column permutation vector */ int *etree; void *work; int info, lwork, nrhs, ldx; int i, j, m, n, nnz; complex *rhsb, *rhsb1, *rhsx, *xact; float *R, *C; float *ferr, *berr; float u, rpg, rcond; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; extern void parse_command_line(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Enter main()"); #endif /* Defaults */ lwork = 0; nrhs = 1; equil = YES; u = 1.0; trans = NOTRANS; /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options(&options); /* Can use command line input to modify the defaults. */ parse_command_line(argc, argv, &lwork, &u, &equil, &trans); options.Equil = equil; options.DiagPivotThresh = u; options.Trans = trans; if ( lwork > 0 ) { work = SUPERLU_MALLOC(lwork); if ( !work ) { ABORT("DLINSOLX: cannot allocate work[]"); } } /* Read matrix A from a file in Harwell-Boeing format.*/ creadhb(&m, &n, &nnz, &a, &asub, &xa); if ( !(a1 = complexMalloc(nnz)) ) ABORT("Malloc fails for a1[]."); if ( !(asub1 = intMalloc(nnz)) ) ABORT("Malloc fails for asub1[]."); if ( !(xa1 = intMalloc(n+1)) ) ABORT("Malloc fails for xa1[]."); for (i = 0; i < nnz; ++i) { a1[i] = a[i]; asub1[i] = asub[i]; } for (i = 0; i < n+1; ++i) xa1[i] = xa[i]; cCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_C, SLU_GE); Astore = A.Store; printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz); if ( !(rhsb = complexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[]."); if ( !(rhsb1 = complexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb1[]."); if ( !(rhsx = complexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[]."); cCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_C, SLU_GE); cCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_C, SLU_GE); xact = complexMalloc(n * nrhs); ldx = n; cGenXtrue(n, nrhs, xact, ldx); cFillRHS(trans, nrhs, xact, ldx, &A, &B); for (j = 0; j < nrhs; ++j) for (i = 0; i < m; ++i) rhsb1[i+j*m] = rhsb[i+j*m]; if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[]."); if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[]."); if ( !(R = (float *) SUPERLU_MALLOC(A.nrow * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for R[]."); if ( !(C = (float *) SUPERLU_MALLOC(A.ncol * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for C[]."); if ( !(ferr = (float *) SUPERLU_MALLOC(nrhs * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for ferr[]."); if ( !(berr = (float *) SUPERLU_MALLOC(nrhs * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for berr[]."); /* Initialize the statistics variables. */ StatInit(&stat); /* ------------------------------------------------------------ WE SOLVE THE LINEAR SYSTEM FOR THE FIRST TIME: AX = B ------------------------------------------------------------*/ cgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info); printf("First system: cgssvx() returns info %d\n", info); if ( info == 0 || info == n+1 ) { /* This is how you could access the solution matrix. */ complex *sol = (complex*) ((DNformat*) X.Store)->nzval; if ( options.PivotGrowth ) printf("Recip. pivot growth = %e\n", rpg); if ( options.ConditionNumber ) printf("Recip. condition number = %e\n", rcond); Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/nnz); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); if ( options.IterRefine ) { printf("Iterative Refinement:\n"); printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"); for (i = 0; i < nrhs; ++i) printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]); } fflush(stdout); } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); Destroy_CompCol_Matrix(&A); Destroy_Dense_Matrix(&B); if ( lwork >= 0 ) { /* Deallocate storage associated with L and U. */ Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } /* ------------------------------------------------------------ NOW WE SOLVE ANOTHER LINEAR SYSTEM: A1*X = B1 ONLY THE SPARSITY PATTERN OF A1 IS THE SAME AS THAT OF A. ------------------------------------------------------------*/ options.Fact = SamePattern; StatInit(&stat); /* Initialize the statistics variables. */ cCreate_CompCol_Matrix(&A1, m, n, nnz, a1, asub1, xa1, SLU_NC, SLU_C, SLU_GE); cCreate_Dense_Matrix(&B1, m, nrhs, rhsb1, m, SLU_DN, SLU_C, SLU_GE); cgssvx(&options, &A1, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B1, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info); printf("\nSecond system: cgssvx() returns info %d\n", info); if ( info == 0 || info == n+1 ) { /* This is how you could access the solution matrix. */ complex *sol = (complex*) ((DNformat*) X.Store)->nzval; if ( options.PivotGrowth ) printf("Recip. pivot growth = %e\n", rpg); if ( options.ConditionNumber ) printf("Recip. condition number = %e\n", rcond); Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); if ( options.IterRefine ) { printf("Iterative Refinement:\n"); printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"); for (i = 0; i < nrhs; ++i) printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]); } fflush(stdout); } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); SUPERLU_FREE (xact); SUPERLU_FREE (etree); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); SUPERLU_FREE (R); SUPERLU_FREE (C); SUPERLU_FREE (ferr); SUPERLU_FREE (berr); Destroy_CompCol_Matrix(&A1); Destroy_Dense_Matrix(&B1); Destroy_Dense_Matrix(&X); if ( lwork == 0 ) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } else if ( lwork > 0 ) { SUPERLU_FREE(work); } #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Exit main()"); #endif }
void c_fortran_zgssv_(int *iopt, int *n, int *nnz, int *nrhs, doublecomplex *values, int *rowind, int *colptr, doublecomplex *b, int *ldb, fptr *f_factors, /* a handle containing the address pointing to the factored matrices */ int *info) { /* * This routine can be called from Fortran. * * iopt (input) int * Specifies the operation: * = 1, performs LU decomposition for the first time * = 2, performs triangular solve * = 3, free all the storage in the end * * f_factors (input/output) fptr* * If iopt == 1, it is an output and contains the pointer pointing to * the structure of the factored matrices. * Otherwise, it it an input. * */ SuperMatrix A, AC, B; SuperMatrix *L, *U; int *perm_r; /* row permutations from partial pivoting */ int *perm_c; /* column permutation vector */ int *etree; /* column elimination tree */ SCformat *Lstore; NCformat *Ustore; int i, panel_size, permc_spec, relax; trans_t trans; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; factors_t *LUfactors; trans = TRANS; if ( *iopt == 1 ) { /* LU decomposition */ /* Set the default input options. */ set_default_options(&options); /* Initialize the statistics variables. */ StatInit(&stat); /* Adjust to 0-based indexing */ for (i = 0; i < *nnz; ++i) --rowind[i]; for (i = 0; i <= *n; ++i) --colptr[i]; zCreate_CompCol_Matrix(&A, *n, *n, *nnz, values, rowind, colptr, SLU_NC, SLU_Z, SLU_GE); L = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); U = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); if ( !(perm_r = intMalloc(*n)) ) ABORT("Malloc fails for perm_r[]."); if ( !(perm_c = intMalloc(*n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(etree = intMalloc(*n)) ) ABORT("Malloc fails for etree[]."); /* * Get column permutation vector perm_c[], according to permc_spec: * permc_spec = 0: natural ordering * permc_spec = 1: minimum degree on structure of A'*A * permc_spec = 2: minimum degree on structure of A'+A * permc_spec = 3: approximate minimum degree for unsymmetric matrices */ permc_spec = options.ColPerm; get_perm_c(permc_spec, &A, perm_c); sp_preorder(&options, &A, perm_c, etree, &AC); panel_size = sp_ienv(1); relax = sp_ienv(2); zgstrf(&options, &AC, relax, panel_size, etree, NULL, 0, perm_c, perm_r, L, U, &stat, info); if ( *info == 0 ) { Lstore = (SCformat *) L->Store; Ustore = (NCformat *) U->Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz); zQuerySpace(L, U, &mem_usage); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); } else { printf("zgstrf() error returns INFO= %d\n", *info); if ( *info <= *n ) { /* factorization completes */ zQuerySpace(L, U, &mem_usage); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); } } /* Restore to 1-based indexing */ for (i = 0; i < *nnz; ++i) ++rowind[i]; for (i = 0; i <= *n; ++i) ++colptr[i]; /* Save the LU factors in the factors handle */ LUfactors = (factors_t*) SUPERLU_MALLOC(sizeof(factors_t)); LUfactors->L = L; LUfactors->U = U; LUfactors->perm_c = perm_c; LUfactors->perm_r = perm_r; *f_factors = (fptr) LUfactors; /* Free un-wanted storage */ SUPERLU_FREE(etree); Destroy_SuperMatrix_Store(&A); Destroy_CompCol_Permuted(&AC); StatFree(&stat); } else if ( *iopt == 2 ) { /* Triangular solve */ /* Initialize the statistics variables. */ StatInit(&stat); /* Extract the LU factors in the factors handle */ LUfactors = (factors_t*) *f_factors; L = LUfactors->L; U = LUfactors->U; perm_c = LUfactors->perm_c; perm_r = LUfactors->perm_r; zCreate_Dense_Matrix(&B, *n, *nrhs, b, *ldb, SLU_DN, SLU_Z, SLU_GE); /* Solve the system A*X=B, overwriting B with X. */ zgstrs (trans, L, U, perm_c, perm_r, &B, &stat, info); Destroy_SuperMatrix_Store(&B); StatFree(&stat); } else if ( *iopt == 3 ) { /* Free storage */ /* Free the LU factors in the factors handle */ LUfactors = (factors_t*) *f_factors; SUPERLU_FREE (LUfactors->perm_r); SUPERLU_FREE (LUfactors->perm_c); Destroy_SuperNode_Matrix(LUfactors->L); Destroy_CompCol_Matrix(LUfactors->U); SUPERLU_FREE (LUfactors->L); SUPERLU_FREE (LUfactors->U); SUPERLU_FREE (LUfactors); } else { fprintf(stderr,"Invalid iopt=%d passed to c_fortran_zgssv()\n",*iopt); exit(-1); } }
static PyObject *SuperLU_solve(SuperLUObject * self, PyObject * args, PyObject * kwds) { PyArrayObject *b, *x = NULL; SuperMatrix B = { 0 }; #ifndef NPY_PY3K char itrans = 'N'; #else int itrans = 'N'; #endif int info; trans_t trans; SuperLUStat_t stat = { 0 }; static char *kwlist[] = { "rhs", "trans", NULL }; if (!CHECK_SLU_TYPE(self->type)) { PyErr_SetString(PyExc_ValueError, "unsupported data type"); return NULL; } #ifndef NPY_PY3K if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!|c", kwlist, &PyArray_Type, &b, &itrans)) #else if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!|C", kwlist, &PyArray_Type, &b, &itrans)) #endif return NULL; /* solve transposed system: matrix was passed row-wise instead of * column-wise */ if (itrans == 'n' || itrans == 'N') trans = NOTRANS; else if (itrans == 't' || itrans == 'T') trans = TRANS; else if (itrans == 'h' || itrans == 'H') trans = CONJ; else { PyErr_SetString(PyExc_ValueError, "trans must be N, T, or H"); return NULL; } x = (PyArrayObject*)PyArray_FROMANY( (PyObject*)b, self->type, 1, 2, NPY_F_CONTIGUOUS | NPY_ENSURECOPY); if (x == NULL) { goto fail; } if (x->dimensions[0] != self->n) { PyErr_SetString(PyExc_ValueError, "b is of incompatible size"); goto fail; } if (setjmp(_superlu_py_jmpbuf)) goto fail; if (DenseSuper_from_Numeric(&B, (PyObject *)x)) goto fail; StatInit(&stat); /* Solve the system, overwriting vector x. */ gstrs(self->type, trans, &self->L, &self->U, self->perm_c, self->perm_r, &B, &stat, &info); if (info) { PyErr_SetString(PyExc_SystemError, "gstrs was called with invalid arguments"); goto fail; } /* free memory */ Destroy_SuperMatrix_Store(&B); StatFree(&stat); return (PyObject *) x; fail: XDestroy_SuperMatrix_Store(&B); XStatFree(&stat); Py_XDECREF(x); return NULL; }
void dgssv(SuperMatrix *A, int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U, SuperMatrix *B, int *info ) { /* * Purpose * ======= * * DGSSV solves the system of linear equations A*X=B, using the * LU factorization from DGSTRF. It performs the following steps: * * 1. If A is stored column-wise (A->Stype = SLU_NC): * * 1.1. Permute the columns of A, forming A*Pc, where Pc * is a permutation matrix. For more details of this step, * see sp_preorder.c. * * 1.2. Factor A as Pr*A*Pc=L*U with the permutation Pr determined * by Gaussian elimination with partial pivoting. * L is unit lower triangular with offdiagonal entries * bounded by 1 in magnitude, and U is upper triangular. * * 1.3. Solve the system of equations A*X=B using the factored * form of A. * * 2. If A is stored row-wise (A->Stype = SLU_NR), apply the * above algorithm to the transpose of A: * * 2.1. Permute columns of transpose(A) (rows of A), * forming transpose(A)*Pc, where Pc is a permutation matrix. * For more details of this step, see sp_preorder.c. * * 2.2. Factor A as Pr*transpose(A)*Pc=L*U with the permutation Pr * determined by Gaussian elimination with partial pivoting. * L is unit lower triangular with offdiagonal entries * bounded by 1 in magnitude, and U is upper triangular. * * 2.3. Solve the system of equations A*X=B using the factored * form of A. * * See supermatrix.h for the definition of 'SuperMatrix' structure. * * Arguments * ========= * * A (input) SuperMatrix* * Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number * of linear equations is A->nrow. Currently, the type of A can be: * Stype = SLU_NC or SLU_NR; Dtype = SLU_D; Mtype = SLU_GE. * In the future, more general A may be handled. * * perm_c (input/output) int* * If A->Stype = SLU_NC, column permutation vector of size A->ncol * which defines the permutation matrix Pc; perm_c[i] = j means * column i of A is in position j in A*Pc. * On exit, perm_c may be overwritten by the product of the input * perm_c and a permutation that postorders the elimination tree * of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree * is already in postorder. * * If A->Stype = SLU_NR, column permutation vector of size A->nrow * which describes permutation of columns of transpose(A) * (rows of A) as described above. * * perm_r (output) int* * If A->Stype = SLU_NC, row permutation vector of size A->nrow, * which defines the permutation matrix Pr, and is determined * by partial pivoting. perm_r[i] = j means row i of A is in * position j in Pr*A. * * If A->Stype = SLU_NR, permutation vector of size A->ncol, which * determines permutation of rows of transpose(A) * (columns of A) as described above. * * L (output) SuperMatrix* * The factor L from the factorization * Pr*A*Pc=L*U (if A->Stype = SLU_NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR). * Uses compressed row subscripts storage for supernodes, i.e., * L has types: Stype = SC, Dtype = SLU_D, Mtype = TRLU. * * U (output) SuperMatrix* * The factor U from the factorization * Pr*A*Pc=L*U (if A->Stype = SLU_NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR). * Uses column-wise storage scheme, i.e., U has types: * Stype = SLU_NC, Dtype = SLU_D, Mtype = TRU. * * B (input/output) SuperMatrix* * B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE. * On entry, the right hand side matrix. * On exit, the solution matrix if info = 0; * * info (output) int* * = 0: successful exit * > 0: if info = i, and i is * <= A->ncol: U(i,i) is exactly zero. The factorization has * been completed, but the factor U is exactly singular, * so the solution could not be computed. * > A->ncol: number of bytes allocated when memory allocation * failure occurred, plus A->ncol. * */ double t1; /* Temporary time */ char refact[1], trans[1]; DNformat *Bstore; SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/ SuperMatrix AC; /* Matrix postmultiplied by Pc */ int lwork = 0, *etree, i; /* Set default values for some parameters */ double diag_pivot_thresh = 1.0; double drop_tol = 0; int panel_size; /* panel size */ int relax; /* no of columns in a relaxed snodes */ double *utime; extern SuperLUStat_t SuperLUStat; /* Test the input parameters ... */ *info = 0; Bstore = B->Store; if ( A->nrow != A->ncol || A->nrow < 0 || (A->Stype != SLU_NC && A->Stype != SLU_NR) || A->Dtype != SLU_D || A->Mtype != SLU_GE ) *info = -1; else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) || B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE ) *info = -6; if ( *info != 0 ) { i = -(*info); xerbla_("dgssv", &i); return; } *refact = 'N'; *trans = 'N'; panel_size = sp_ienv(1); relax = sp_ienv(2); StatInit(panel_size, relax); utime = SuperLUStat.utime; /* Convert A to SLU_NC format when necessary. */ if ( A->Stype == SLU_NR ) { NRformat *Astore = A->Store; AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, Astore->nzval, Astore->colind, Astore->rowptr, SLU_NC, A->Dtype, A->Mtype); *trans = 'T'; } else if ( A->Stype == SLU_NC ) AA = A; etree = intMalloc(A->ncol); t1 = SuperLU_timer_(); sp_preorder(refact, AA, perm_c, etree, &AC); utime[ETREE] = SuperLU_timer_() - t1; /*printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", relax, panel_size, sp_ienv(3), sp_ienv(4));*/ t1 = SuperLU_timer_(); /* Compute the LU factorization of A. */ dgstrf(refact, &AC, diag_pivot_thresh, drop_tol, relax, panel_size, etree, NULL, lwork, perm_r, perm_c, L, U, info); utime[FACT] = SuperLU_timer_() - t1; t1 = SuperLU_timer_(); if ( *info == 0 ) { /* Solve the system A*X=B, overwriting B with X. */ dgstrs (trans, L, U, perm_r, perm_c, B, info); } utime[SOLVE] = SuperLU_timer_() - t1; SUPERLU_FREE (etree); Destroy_CompCol_Permuted(&AC); if ( A->Stype == SLU_NR ) { Destroy_SuperMatrix_Store(AA); SUPERLU_FREE(AA); } /*PrintStat( &SuperLUStat );*/ StatFree(); }
int main ( int argc, char *argv[] ) /**********************************************************************/ /* Purpose: SUPER_LU_D0 runs a small 5 by 5 example of the use of SUPER_LU. Modified: 23 April 2004 Reference: James Demmel, John Gilbert, Xiaoye Li, SuperLU Users's Guide, Sections 1 and 2. */ { double *a; SuperMatrix A; int *asub; SuperMatrix B; int i; int info; SuperMatrix L; int m; int n; int nnz; int nrhs; superlu_options_t options; int *perm_c; int *perm_r; int permc_spec; double *rhs; double sol[5]; SuperLUStat_t stat; SuperMatrix U; int *xa; /* Say hello. */ printf ( "\n" ); printf ( "SUPER_LU_D0:\n" ); printf ( " Simple 5 by 5 example of SUPER_LU solver.\n" ); /* Initialize parameters. */ m = 5; n = 5; nnz = 12; /* Set aside space for the arrays. */ a = doubleMalloc ( nnz ); if ( !a ) { ABORT ( "Malloc fails for a[]." ); } asub = intMalloc ( nnz ); if ( !asub ) { ABORT ( "Malloc fails for asub[]." ); } xa = intMalloc ( n+1 ); if ( !xa ) { ABORT ( "Malloc fails for xa[]." ); } /* Initialize matrix A. */ a[0] = 19.0; a[1] = 12.0; a[2] = 12.0; a[3] = 21.0; a[4] = 12.0; a[5] = 12.0; a[6] = 21.0; a[7] = 16.0; a[8] = 21.0; a[9] = 5.0; a[10]= 21.0; a[11]= 18.0; asub[0] = 0; asub[1] = 1; asub[2] = 4; asub[3] = 1; asub[4] = 2; asub[5] = 4; asub[6] = 0; asub[7] = 2; asub[8] = 0; asub[9] = 3; asub[10]= 3; asub[11]= 4; xa[0] = 0; xa[1] = 3; xa[2] = 6; xa[3] = 8; xa[4] = 10; xa[5] = 12; sol[0] = -0.031250000; sol[1] = 0.065476190; sol[2] = 0.013392857; sol[3] = 0.062500000; sol[4] = 0.032738095; /* Create matrix A in the format expected by SuperLU. */ dCreate_CompCol_Matrix ( &A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE ); /* Create the right-hand side matrix B. */ nrhs = 1; rhs = doubleMalloc ( m * nrhs ); if ( !rhs ) { ABORT("Malloc fails for rhs[]."); } for ( i = 0; i < m; i++ ) { rhs[i] = 1.0; } dCreate_Dense_Matrix ( &B, m, nrhs, rhs, m, SLU_DN, SLU_D, SLU_GE ); /* Set up the arrays for the permutations. */ perm_r = intMalloc ( m ); if ( !perm_r ) { ABORT ( "Malloc fails for perm_r[]." ); } perm_c = intMalloc ( n ); if ( !perm_c ) { ABORT ( "Malloc fails for perm_c[]." ); } /* Set the default input options, and then adjust some of them. */ set_default_options ( &options ); options.ColPerm = NATURAL; /* Initialize the statistics variables. */ StatInit ( &stat ); /* Factor the matrix and solve the linear system. */ dgssv ( &options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info ); /* Print some of the results. */ dPrint_CompCol_Matrix ( "Matrix A", &A ); dPrint_SuperNode_Matrix ( "Factor L", &L ); dPrint_CompCol_Matrix ( "Factor U", &U ); dPrint_Dense_Matrix ( "Solution X", &B ); printf ( "\n" ); printf ( " The exact solution:\n" ); printf ( "\n" ); for ( i = 0; i < n; i++ ) { printf ( "%d %f\n", i, sol[i] ); } printf ( "\n" ); print_int_vec ( "perm_r", m, perm_r ); /* De-allocate storage. */ SUPERLU_FREE ( rhs ); SUPERLU_FREE ( perm_r ); SUPERLU_FREE ( perm_c ); Destroy_CompCol_Matrix ( &A ); Destroy_SuperMatrix_Store ( &B ); Destroy_SuperNode_Matrix ( &L ); Destroy_CompCol_Matrix ( &U ); StatFree ( &stat ); printf ( "\n" ); printf ( "SUPER_LU_D0:\n" ); printf ( " Normal end of execution.\n" ); return 0; }
static PyObject * Py_gssv(PyObject *self, PyObject *args, PyObject *kwdict) { PyObject *Py_B=NULL, *Py_X=NULL; PyArrayObject *nzvals=NULL; PyArrayObject *colind=NULL, *rowptr=NULL; int N, nnz; int info; int csc=0; int *perm_r=NULL, *perm_c=NULL; SuperMatrix A, B, L, U; superlu_options_t options; SuperLUStat_t stat; PyObject *option_dict = NULL; int type; int ssv_finished = 0; static char *kwlist[] = {"N","nnz","nzvals","colind","rowptr","B", "csc", "options",NULL}; /* Get input arguments */ if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iiO!O!O!O|iO", kwlist, &N, &nnz, &PyArray_Type, &nzvals, &PyArray_Type, &colind, &PyArray_Type, &rowptr, &Py_B, &csc, &option_dict)) { return NULL; } if (!_CHECK_INTEGER(colind) || !_CHECK_INTEGER(rowptr)) { PyErr_SetString(PyExc_TypeError, "colind and rowptr must be of type cint"); return NULL; } type = PyArray_TYPE(nzvals); if (!CHECK_SLU_TYPE(type)) { PyErr_SetString(PyExc_TypeError, "nzvals is not of a type supported by SuperLU"); return NULL; } if (!set_superlu_options_from_dict(&options, 0, option_dict, NULL, NULL)) { return NULL; } /* Create Space for output */ Py_X = PyArray_CopyFromObject(Py_B, type, 1, 2); if (Py_X == NULL) return NULL; if (csc) { if (NCFormat_from_spMatrix(&A, N, N, nnz, nzvals, colind, rowptr, type)) { Py_DECREF(Py_X); return NULL; } } else { if (NRFormat_from_spMatrix(&A, N, N, nnz, nzvals, colind, rowptr, type)) { Py_DECREF(Py_X); return NULL; } } if (DenseSuper_from_Numeric(&B, Py_X)) { Destroy_SuperMatrix_Store(&A); Py_DECREF(Py_X); return NULL; } /* B and Py_X share same data now but Py_X "owns" it */ /* Setup options */ if (setjmp(_superlu_py_jmpbuf)) { goto fail; } else { perm_c = intMalloc(N); perm_r = intMalloc(N); StatInit(&stat); /* Compute direct inverse of sparse Matrix */ gssv(type, &options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info); } ssv_finished = 1; SUPERLU_FREE(perm_r); SUPERLU_FREE(perm_c); Destroy_SuperMatrix_Store(&A); /* holds just a pointer to the data */ Destroy_SuperMatrix_Store(&B); Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); StatFree(&stat); return Py_BuildValue("Ni", Py_X, info); fail: SUPERLU_FREE(perm_r); SUPERLU_FREE(perm_c); Destroy_SuperMatrix_Store(&A); /* holds just a pointer to the data */ Destroy_SuperMatrix_Store(&B); if (ssv_finished) { /* Avoid trying to free partially initialized matrices; might leak some memory, but avoids a crash */ Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } StatFree(&stat); Py_XDECREF(Py_X); return NULL; }
PyObject *newSuperLUObject(SuperMatrix * A, PyObject * option_dict, int intype, int ilu) { /* A must be in SLU_NC format used by the factorization routine. */ SuperLUObject *self; SuperMatrix AC = { 0 }; /* Matrix postmultiplied by Pc */ int lwork = 0; int *etree = NULL; int info; int n; superlu_options_t options; SuperLUStat_t stat = { 0 }; int panel_size, relax; n = A->ncol; if (!set_superlu_options_from_dict(&options, ilu, option_dict, &panel_size, &relax)) { return NULL; } /* Create SLUObject */ self = PyObject_New(SuperLUObject, &SuperLUType); if (self == NULL) return PyErr_NoMemory(); self->m = A->nrow; self->n = n; self->perm_r = NULL; self->perm_c = NULL; self->L.Store = NULL; self->U.Store = NULL; self->cached_U = NULL; self->cached_L = NULL; self->type = intype; if (setjmp(_superlu_py_jmpbuf)) goto fail; /* Calculate and apply minimum degree ordering */ etree = intMalloc(n); self->perm_r = intMalloc(n); self->perm_c = intMalloc(n); StatInit(&stat); get_perm_c(options.ColPerm, A, self->perm_c); /* calc column permutation */ sp_preorder(&options, A, self->perm_c, etree, &AC); /* apply column * permutation */ /* Perform factorization */ if (!CHECK_SLU_TYPE(SLU_TYPECODE_TO_NPY(A->Dtype))) { PyErr_SetString(PyExc_ValueError, "Invalid type in SuperMatrix."); goto fail; } if (ilu) { gsitrf(SLU_TYPECODE_TO_NPY(A->Dtype), &options, &AC, relax, panel_size, etree, NULL, lwork, self->perm_c, self->perm_r, &self->L, &self->U, &stat, &info); } else { gstrf(SLU_TYPECODE_TO_NPY(A->Dtype), &options, &AC, relax, panel_size, etree, NULL, lwork, self->perm_c, self->perm_r, &self->L, &self->U, &stat, &info); } if (info) { if (info < 0) PyErr_SetString(PyExc_SystemError, "gstrf was called with invalid arguments"); else { if (info <= n) PyErr_SetString(PyExc_RuntimeError, "Factor is exactly singular"); else PyErr_NoMemory(); } goto fail; } /* free memory */ SUPERLU_FREE(etree); Destroy_CompCol_Permuted(&AC); StatFree(&stat); return (PyObject *) self; fail: SUPERLU_FREE(etree); XDestroy_CompCol_Permuted(&AC); XStatFree(&stat); Py_DECREF(self); return NULL; }
void psgssvx(int nprocs, superlumt_options_t *superlumt_options, SuperMatrix *A, int *perm_c, int *perm_r, equed_t *equed, float *R, float *C, SuperMatrix *L, SuperMatrix *U, SuperMatrix *B, SuperMatrix *X, float *recip_pivot_growth, float *rcond, float *ferr, float *berr, superlu_memusage_t *superlu_memusage, int *info) { /* * -- SuperLU MT routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley, * and Xerox Palo Alto Research Center. * September 10, 2007 * * Purpose * ======= * * psgssvx() solves the system of linear equations A*X=B or A'*X=B, using * the LU factorization from sgstrf(). Error bounds on the solution and * a condition estimate are also provided. It performs the following steps: * * 1. If A is stored column-wise (A->Stype = NC): * * 1.1. If fact = EQUILIBRATE, scaling factors are computed to equilibrate * the system: * trans = NOTRANS: diag(R)*A*diag(C)*inv(diag(C))*X = diag(R)*B * trans = TRANS: (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B * trans = CONJ: (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A is * overwritten by diag(R)*A*diag(C) and B by diag(R)*B * (if trans = NOTRANS) or diag(C)*B (if trans = TRANS or CONJ). * * 1.2. Permute columns of A, forming A*Pc, where Pc is a permutation matrix * that usually preserves sparsity. * For more details of this step, see ssp_colorder.c. * * 1.3. If fact = DOFACT or EQUILIBRATE, the LU decomposition is used to * factor the matrix A (after equilibration if fact = EQUILIBRATE) as * Pr*A*Pc = L*U, with Pr determined by partial pivoting. * * 1.4. Compute the reciprocal pivot growth factor. * * 1.5. If some U(i,i) = 0, so that U is exactly singular, then the routine * returns with info = i. Otherwise, the factored form of A is used to * estimate the condition number of the matrix A. If the reciprocal of * the condition number is less than machine precision, * info = A->ncol+1 is returned as a warning, but the routine still * goes on to solve for X and computes error bounds as described below. * * 1.6. The system of equations is solved for X using the factored form * of A. * * 1.7. Iterative refinement is applied to improve the computed solution * matrix and calculate error bounds and backward error estimates * for it. * * 1.8. If equilibration was used, the matrix X is premultiplied by * diag(C) (if trans = NOTRANS) or diag(R) (if trans = TRANS or CONJ) * so that it solves the original system before equilibration. * * 2. If A is stored row-wise (A->Stype = NR), apply the above algorithm * to the tranpose of A: * * 2.1. If fact = EQUILIBRATE, scaling factors are computed to equilibrate * the system: * trans = NOTRANS:diag(R)*A'*diag(C)*inv(diag(C))*X = diag(R)*B * trans = TRANS: (diag(R)*A'*diag(C))**T *inv(diag(R))*X = diag(C)*B * trans = CONJ: (diag(R)*A'*diag(C))**H *inv(diag(R))*X = diag(C)*B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A' is * overwritten by diag(R)*A'*diag(C) and B by diag(R)*B * (if trans = NOTRANS) or diag(C)*B (if trans = TRANS or CONJ). * * 2.2. Permute columns of transpose(A) (rows of A), * forming transpose(A)*Pc, where Pc is a permutation matrix that * usually preserves sparsity. * For more details of this step, see ssp_colorder.c. * * 2.3. If fact = DOFACT or EQUILIBRATE, the LU decomposition is used to * factor the matrix A (after equilibration if fact = EQUILIBRATE) as * Pr*transpose(A)*Pc = L*U, with the permutation Pr determined by * partial pivoting. * * 2.4. Compute the reciprocal pivot growth factor. * * 2.5. If some U(i,i) = 0, so that U is exactly singular, then the routine * returns with info = i. Otherwise, the factored form of transpose(A) * is used to estimate the condition number of the matrix A. * If the reciprocal of the condition number is less than machine * precision, info = A->nrow+1 is returned as a warning, but the * routine still goes on to solve for X and computes error bounds * as described below. * * 2.6. The system of equations is solved for X using the factored form * of transpose(A). * * 2.7. Iterative refinement is applied to improve the computed solution * matrix and calculate error bounds and backward error estimates * for it. * * 2.8. If equilibration was used, the matrix X is premultiplied by * diag(C) (if trans = NOTRANS) or diag(R) (if trans = TRANS or CONJ) * so that it solves the original system before equilibration. * * See supermatrix.h for the definition of 'SuperMatrix' structure. * * Arguments * ========= * * nprocs (input) int * Number of processes (or threads) to be spawned and used to perform * the LU factorization by psgstrf(). There is a single thread of * control to call psgstrf(), and all threads spawned by psgstrf() * are terminated before returning from psgstrf(). * * superlumt_options (input) superlumt_options_t* * The structure defines the input parameters and data structure * to control how the LU factorization will be performed. * The following fields should be defined for this structure: * * o fact (fact_t) * Specifies whether or not the factored form of the matrix * A is supplied on entry, and if not, whether the matrix A should * be equilibrated before it is factored. * = FACTORED: On entry, L, U, perm_r and perm_c contain the * factored form of A. If equed is not NOEQUIL, the matrix A has * been equilibrated with scaling factors R and C. * A, L, U, perm_r are not modified. * = DOFACT: The matrix A will be factored, and the factors will be * stored in L and U. * = EQUILIBRATE: The matrix A will be equilibrated if necessary, * then factored into L and U. * * o trans (trans_t) * Specifies the form of the system of equations: * = NOTRANS: A * X = B (No transpose) * = TRANS: A**T * X = B (Transpose) * = CONJ: A**H * X = B (Transpose) * * o refact (yes_no_t) * Specifies whether this is first time or subsequent factorization. * = NO: this factorization is treated as the first one; * = YES: it means that a factorization was performed prior to this * one. Therefore, this factorization will re-use some * existing data structures, such as L and U storage, column * elimination tree, and the symbolic information of the * Householder matrix. * * o panel_size (int) * A panel consists of at most panel_size consecutive columns. * * o relax (int) * To control degree of relaxing supernodes. If the number * of nodes (columns) in a subtree of the elimination tree is less * than relax, this subtree is considered as one supernode, * regardless of the row structures of those columns. * * o diag_pivot_thresh (float) * Diagonal pivoting threshold. At step j of the Gaussian * elimination, if * abs(A_jj) >= diag_pivot_thresh * (max_(i>=j) abs(A_ij)), * use A_jj as pivot, else use A_ij with maximum magnitude. * 0 <= diag_pivot_thresh <= 1. The default value is 1, * corresponding to partial pivoting. * * o usepr (yes_no_t) * Whether the pivoting will use perm_r specified by the user. * = YES: use perm_r; perm_r is input, unchanged on exit. * = NO: perm_r is determined by partial pivoting, and is output. * * o drop_tol (double) (NOT IMPLEMENTED) * Drop tolerance parameter. At step j of the Gaussian elimination, * if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij. * 0 <= drop_tol <= 1. The default value of drop_tol is 0, * corresponding to not dropping any entry. * * o work (void*) of size lwork * User-supplied work space and space for the output data structures. * Not referenced if lwork = 0; * * o lwork (int) * Specifies the length of work array. * = 0: allocate space internally by system malloc; * > 0: use user-supplied work array of length lwork in bytes, * returns error if space runs out. * = -1: the routine guesses the amount of space needed without * performing the factorization, and returns it in * superlu_memusage->total_needed; no other side effects. * * A (input/output) SuperMatrix* * Matrix A in A*X=B, of dimension (A->nrow, A->ncol), where * A->nrow = A->ncol. Currently, the type of A can be: * Stype = NC or NR, Dtype = _D, Mtype = GE. In the future, * more general A will be handled. * * On entry, If superlumt_options->fact = FACTORED and equed is not * NOEQUIL, then A must have been equilibrated by the scaling factors * in R and/or C. On exit, A is not modified * if superlumt_options->fact = FACTORED or DOFACT, or * if superlumt_options->fact = EQUILIBRATE and equed = NOEQUIL. * * On exit, if superlumt_options->fact = EQUILIBRATE and equed is not * NOEQUIL, A is scaled as follows: * If A->Stype = NC: * equed = ROW: A := diag(R) * A * equed = COL: A := A * diag(C) * equed = BOTH: A := diag(R) * A * diag(C). * If A->Stype = NR: * equed = ROW: transpose(A) := diag(R) * transpose(A) * equed = COL: transpose(A) := transpose(A) * diag(C) * equed = BOTH: transpose(A) := diag(R) * transpose(A) * diag(C). * * perm_c (input/output) int* * If A->Stype = NC, Column permutation vector of size A->ncol, * which defines the permutation matrix Pc; perm_c[i] = j means * column i of A is in position j in A*Pc. * On exit, perm_c may be overwritten by the product of the input * perm_c and a permutation that postorders the elimination tree * of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree * is already in postorder. * * If A->Stype = NR, column permutation vector of size A->nrow, * which describes permutation of columns of tranpose(A) * (rows of A) as described above. * * perm_r (input/output) int* * If A->Stype = NC, row permutation vector of size A->nrow, * which defines the permutation matrix Pr, and is determined * by partial pivoting. perm_r[i] = j means row i of A is in * position j in Pr*A. * * If A->Stype = NR, permutation vector of size A->ncol, which * determines permutation of rows of transpose(A) * (columns of A) as described above. * * If superlumt_options->usepr = NO, perm_r is output argument; * If superlumt_options->usepr = YES, the pivoting routine will try * to use the input perm_r, unless a certain threshold criterion * is violated. In that case, perm_r is overwritten by a new * permutation determined by partial pivoting or diagonal * threshold pivoting. * * equed (input/output) equed_t* * Specifies the form of equilibration that was done. * = NOEQUIL: No equilibration. * = ROW: Row equilibration, i.e., A was premultiplied by diag(R). * = COL: Column equilibration, i.e., A was postmultiplied by diag(C). * = BOTH: Both row and column equilibration, i.e., A was replaced * by diag(R)*A*diag(C). * If superlumt_options->fact = FACTORED, equed is an input argument, * otherwise it is an output argument. * * R (input/output) double*, dimension (A->nrow) * The row scale factors for A or transpose(A). * If equed = ROW or BOTH, A (if A->Stype = NC) or transpose(A) * (if A->Stype = NR) is multiplied on the left by diag(R). * If equed = NOEQUIL or COL, R is not accessed. * If fact = FACTORED, R is an input argument; otherwise, R is output. * If fact = FACTORED and equed = ROW or BOTH, each element of R must * be positive. * * C (input/output) double*, dimension (A->ncol) * The column scale factors for A or transpose(A). * If equed = COL or BOTH, A (if A->Stype = NC) or trnspose(A) * (if A->Stype = NR) is multiplied on the right by diag(C). * If equed = NOEQUIL or ROW, C is not accessed. * If fact = FACTORED, C is an input argument; otherwise, C is output. * If fact = FACTORED and equed = COL or BOTH, each element of C must * be positive. * * L (output) SuperMatrix* * The factor L from the factorization * Pr*A*Pc=L*U (if A->Stype = NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = NR). * Uses compressed row subscripts storage for supernodes, i.e., * L has types: Stype = SCP, Dtype = _D, Mtype = TRLU. * * U (output) SuperMatrix* * The factor U from the factorization * Pr*A*Pc=L*U (if A->Stype = NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = NR). * Uses column-wise storage scheme, i.e., U has types: * Stype = NCP, Dtype = _D, Mtype = TRU. * * B (input/output) SuperMatrix* * B has types: Stype = DN, Dtype = _D, Mtype = GE. * On entry, the right hand side matrix. * On exit, * if equed = NOEQUIL, B is not modified; otherwise * if A->Stype = NC: * if trans = NOTRANS and equed = ROW or BOTH, B is overwritten * by diag(R)*B; * if trans = TRANS or CONJ and equed = COL of BOTH, B is * overwritten by diag(C)*B; * if A->Stype = NR: * if trans = NOTRANS and equed = COL or BOTH, B is overwritten * by diag(C)*B; * if trans = TRANS or CONJ and equed = ROW of BOTH, B is * overwritten by diag(R)*B. * * X (output) SuperMatrix* * X has types: Stype = DN, Dtype = _D, Mtype = GE. * If info = 0 or info = A->ncol+1, X contains the solution matrix * to the original system of equations. Note that A and B are modified * on exit if equed is not NOEQUIL, and the solution to the * equilibrated system is inv(diag(C))*X if trans = NOTRANS and * equed = COL or BOTH, or inv(diag(R))*X if trans = TRANS or CONJ * and equed = ROW or BOTH. * * recip_pivot_growth (output) float* * The reciprocal pivot growth factor computed as * max_j ( max_i(abs(A_ij)) / max_i(abs(U_ij)) ). * If recip_pivot_growth is much less than 1, the stability of the * LU factorization could be poor. * * rcond (output) float* * The estimate of the reciprocal condition number of the matrix A * after equilibration (if done). If rcond is less than the machine * precision (in particular, if rcond = 0), the matrix is singular * to working precision. This condition is indicated by a return * code of info > 0. * * ferr (output) float*, dimension (B->ncol) * The estimated forward error bound for each solution vector * X(j) (the j-th column of the solution matrix X). * If XTRUE is the true solution corresponding to X(j), FERR(j) * is an estimated upper bound for the magnitude of the largest * element in (X(j) - XTRUE) divided by the magnitude of the * largest element in X(j). The estimate is as reliable as * the estimate for RCOND, and is almost always a slight * overestimate of the true error. * * berr (output) float*, dimension (B->ncol) * The componentwise relative backward error of each solution * vector X(j) (i.e., the smallest relative change in * any element of A or B that makes X(j) an exact solution). * * superlu_memusage (output) superlu_memusage_t* * Record the memory usage statistics, consisting of following fields: * - for_lu (float) * The amount of space used in bytes for L\U data structures. * - total_needed (float) * The amount of space needed in bytes to perform factorization. * - expansions (int) * The number of memory expansions during the LU factorization. * * info (output) int* * = 0: successful exit * < 0: if info = -i, the i-th argument had an illegal value * > 0: if info = i, and i is * <= A->ncol: U(i,i) is exactly zero. The factorization has * been completed, but the factor U is exactly * singular, so the solution and error bounds * could not be computed. * = A->ncol+1: U is nonsingular, but RCOND is less than machine * precision, meaning that the matrix is singular to * working precision. Nevertheless, the solution and * error bounds are computed because there are a number * of situations where the computed solution can be more * accurate than the value of RCOND would suggest. * > A->ncol+1: number of bytes allocated when memory allocation * failure occurred, plus A->ncol. * */ NCformat *Astore; DNformat *Bstore, *Xstore; float *Bmat, *Xmat; int ldb, ldx, nrhs; SuperMatrix *AA; /* A in NC format used by the factorization routine.*/ SuperMatrix AC; /* Matrix postmultiplied by Pc */ int colequ, equil, dofact, notran, rowequ; char norm[1]; trans_t trant; int i, j, info1; float amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin; int n, relax, panel_size; Gstat_t Gstat; double t0; /* temporary time */ double *utime; flops_t *ops, flopcnt; /* External functions */ extern float slangs(char *, SuperMatrix *); extern double slamch_(char *); Astore = A->Store; Bstore = B->Store; Xstore = X->Store; Bmat = Bstore->nzval; Xmat = Xstore->nzval; n = A->ncol; ldb = Bstore->lda; ldx = Xstore->lda; nrhs = B->ncol; superlumt_options->perm_c = perm_c; superlumt_options->perm_r = perm_r; *info = 0; dofact = (superlumt_options->fact == DOFACT); equil = (superlumt_options->fact == EQUILIBRATE); notran = (superlumt_options->trans == NOTRANS); if (dofact || equil) { *equed = NOEQUIL; rowequ = FALSE; colequ = FALSE; } else { rowequ = (*equed == ROW) || (*equed == BOTH); colequ = (*equed == COL) || (*equed == BOTH); smlnum = slamch_("Safe minimum"); bignum = 1. / smlnum; } /* ------------------------------------------------------------ Test the input parameters. ------------------------------------------------------------*/ if ( nprocs <= 0 ) *info = -1; else if ( (!dofact && !equil && (superlumt_options->fact != FACTORED)) || (!notran && (superlumt_options->trans != TRANS) && (superlumt_options->trans != CONJ)) || (superlumt_options->refact != YES && superlumt_options->refact != NO) || (superlumt_options->usepr != YES && superlumt_options->usepr != NO) || superlumt_options->lwork < -1 ) *info = -2; else if ( A->nrow != A->ncol || A->nrow < 0 || (A->Stype != SLU_NC && A->Stype != SLU_NR) || A->Dtype != SLU_S || A->Mtype != SLU_GE ) *info = -3; else if ((superlumt_options->fact == FACTORED) && !(rowequ || colequ || (*equed == NOEQUIL))) *info = -6; else { if (rowequ) { rcmin = bignum; rcmax = 0.; for (j = 0; j < A->nrow; ++j) { rcmin = SUPERLU_MIN(rcmin, R[j]); rcmax = SUPERLU_MAX(rcmax, R[j]); } if (rcmin <= 0.) *info = -7; else if ( A->nrow > 0) rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); else rowcnd = 1.; } if (colequ && *info == 0) { rcmin = bignum; rcmax = 0.; for (j = 0; j < A->nrow; ++j) { rcmin = SUPERLU_MIN(rcmin, C[j]); rcmax = SUPERLU_MAX(rcmax, C[j]); } if (rcmin <= 0.) *info = -8; else if (A->nrow > 0) colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); else colcnd = 1.; } if (*info == 0) { if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) || B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE ) *info = -11; else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) || B->ncol != X->ncol || X->Stype != SLU_DN || X->Dtype != SLU_S || X->Mtype != SLU_GE ) *info = -12; } } if (*info != 0) { i = -(*info); xerbla_("psgssvx", &i); return; } /* ------------------------------------------------------------ Allocate storage and initialize statistics variables. ------------------------------------------------------------*/ panel_size = superlumt_options->panel_size; relax = superlumt_options->relax; StatAlloc(n, nprocs, panel_size, relax, &Gstat); StatInit(n, nprocs, &Gstat); utime = Gstat.utime; ops = Gstat.ops; /* ------------------------------------------------------------ Convert A to NC format when necessary. ------------------------------------------------------------*/ if ( A->Stype == SLU_NR ) { NRformat *Astore = A->Store; AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); sCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, Astore->nzval, Astore->colind, Astore->rowptr, SLU_NC, A->Dtype, A->Mtype); if ( notran ) { /* Reverse the transpose argument. */ trant = TRANS; notran = 0; } else { trant = NOTRANS; notran = 1; } } else { /* A->Stype == NC */ trant = superlumt_options->trans; AA = A; } /* ------------------------------------------------------------ Diagonal scaling to equilibrate the matrix. ------------------------------------------------------------*/ if ( equil ) { t0 = SuperLU_timer_(); /* Compute row and column scalings to equilibrate the matrix A. */ sgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1); if ( info1 == 0 ) { /* Equilibrate matrix A. */ slaqgs(AA, R, C, rowcnd, colcnd, amax, equed); rowequ = (*equed == ROW) || (*equed == BOTH); colequ = (*equed == COL) || (*equed == BOTH); } utime[EQUIL] = SuperLU_timer_() - t0; } /* ------------------------------------------------------------ Scale the right hand side. ------------------------------------------------------------*/ if ( notran ) { if ( rowequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Bmat[i + j*ldb] *= R[i]; } } } else if ( colequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Bmat[i + j*ldb] *= C[i]; } } /* ------------------------------------------------------------ Perform the LU factorization. ------------------------------------------------------------*/ if ( dofact || equil ) { /* Obtain column etree, the column count (colcnt_h) and supernode partition (part_super_h) for the Householder matrix. */ t0 = SuperLU_timer_(); sp_colorder(AA, perm_c, superlumt_options, &AC); utime[ETREE] = SuperLU_timer_() - t0; #if ( PRNTlevel >= 2 ) printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", relax, panel_size, sp_ienv(3), sp_ienv(4)); fflush(stdout); #endif /* Compute the LU factorization of A*Pc. */ t0 = SuperLU_timer_(); psgstrf(superlumt_options, &AC, perm_r, L, U, &Gstat, info); utime[FACT] = SuperLU_timer_() - t0; flopcnt = 0; for (i = 0; i < nprocs; ++i) flopcnt += Gstat.procstat[i].fcops; ops[FACT] = flopcnt; if ( superlumt_options->lwork == -1 ) { superlu_memusage->total_needed = *info - A->ncol; return; } } if ( *info > 0 ) { if ( *info <= A->ncol ) { /* Compute the reciprocal pivot growth factor of the leading rank-deficient *info columns of A. */ *recip_pivot_growth = sPivotGrowth(*info, AA, perm_c, L, U); } } else { /* ------------------------------------------------------------ Compute the reciprocal pivot growth factor *recip_pivot_growth. ------------------------------------------------------------*/ *recip_pivot_growth = sPivotGrowth(A->ncol, AA, perm_c, L, U); /* ------------------------------------------------------------ Estimate the reciprocal of the condition number of A. ------------------------------------------------------------*/ t0 = SuperLU_timer_(); if ( notran ) { *(unsigned char *)norm = '1'; } else { *(unsigned char *)norm = 'I'; } anorm = slangs(norm, AA); sgscon(norm, L, U, anorm, rcond, info); utime[RCOND] = SuperLU_timer_() - t0; /* ------------------------------------------------------------ Compute the solution matrix X. ------------------------------------------------------------*/ for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */ for (i = 0; i < B->nrow; i++) Xmat[i + j*ldx] = Bmat[i + j*ldb]; t0 = SuperLU_timer_(); sgstrs(trant, L, U, perm_r, perm_c, X, &Gstat, info); utime[SOLVE] = SuperLU_timer_() - t0; ops[SOLVE] = ops[TRISOLVE]; /* ------------------------------------------------------------ Use iterative refinement to improve the computed solution and compute error bounds and backward error estimates for it. ------------------------------------------------------------*/ t0 = SuperLU_timer_(); sgsrfs(trant, AA, L, U, perm_r, perm_c, *equed, R, C, B, X, ferr, berr, &Gstat, info); utime[REFINE] = SuperLU_timer_() - t0; /* ------------------------------------------------------------ Transform the solution matrix X to a solution of the original system. ------------------------------------------------------------*/ if ( notran ) { if ( colequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Xmat[i + j*ldx] *= C[i]; } } } else if ( rowequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Xmat[i + j*ldx] *= R[i]; } } /* Set INFO = A->ncol+1 if the matrix is singular to working precision.*/ if ( *rcond < slamch_("E") ) *info = A->ncol + 1; } superlu_sQuerySpace(nprocs, L, U, panel_size, superlu_memusage); /* ------------------------------------------------------------ Deallocate storage after factorization. ------------------------------------------------------------*/ if ( superlumt_options->refact == NO ) { SUPERLU_FREE(superlumt_options->etree); SUPERLU_FREE(superlumt_options->colcnt_h); SUPERLU_FREE(superlumt_options->part_super_h); } if ( dofact || equil ) { Destroy_CompCol_Permuted(&AC); } if ( A->Stype == SLU_NR ) { Destroy_SuperMatrix_Store(AA); SUPERLU_FREE(AA); } /* ------------------------------------------------------------ Print timings, then deallocate statistic variables. ------------------------------------------------------------*/ #ifdef PROFILE { SCPformat *Lstore = (SCPformat *) L->Store; ParallelProfile(n, Lstore->nsuper+1, Gstat.num_panels, nprocs, &Gstat); } #endif PrintStat(&Gstat); StatFree(&Gstat); }
int main(int argc, char *argv[]) { char equed[1]; yes_no_t equil; trans_t trans; SuperMatrix A, L, U; SuperMatrix B, X; NCformat *Astore; NCformat *Ustore; SCformat *Lstore; float *a; int *asub, *xa; int *perm_r; /* row permutations from partial pivoting */ int *perm_c; /* column permutation vector */ int *etree; void *work; int info, lwork, nrhs, ldx; int i, m, n, nnz; float *rhsb, *rhsx, *xact; float *R, *C; float *ferr, *berr; float u, rpg, rcond; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; extern void parse_command_line(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Enter main()"); #endif /* Defaults */ lwork = 0; nrhs = 1; equil = YES; u = 1.0; trans = NOTRANS; /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options(&options); /* Can use command line input to modify the defaults. */ parse_command_line(argc, argv, &lwork, &u, &equil, &trans); options.Equil = equil; options.DiagPivotThresh = u; options.Trans = trans; /* Add more functionalities that the defaults. */ options.PivotGrowth = YES; /* Compute reciprocal pivot growth */ options.ConditionNumber = YES;/* Compute reciprocal condition number */ options.IterRefine = SLU_SINGLE; /* Perform single-precision refinement */ if ( lwork > 0 ) { work = SUPERLU_MALLOC(lwork); if ( !work ) { ABORT("SLINSOLX: cannot allocate work[]"); } } /* Read matrix A from a file in Harwell-Boeing format.*/ sreadhb(&m, &n, &nnz, &a, &asub, &xa); sCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_S, SLU_GE); Astore = A.Store; printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz); if ( !(rhsb = floatMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[]."); if ( !(rhsx = floatMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[]."); sCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_S, SLU_GE); sCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_S, SLU_GE); xact = floatMalloc(n * nrhs); ldx = n; sGenXtrue(n, nrhs, xact, ldx); sFillRHS(trans, nrhs, xact, ldx, &A, &B); if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[]."); if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[]."); if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(R = (float *) SUPERLU_MALLOC(A.nrow * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for R[]."); if ( !(C = (float *) SUPERLU_MALLOC(A.ncol * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for C[]."); if ( !(ferr = (float *) SUPERLU_MALLOC(nrhs * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for ferr[]."); if ( !(berr = (float *) SUPERLU_MALLOC(nrhs * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for berr[]."); /* Initialize the statistics variables. */ StatInit(&stat); /* Solve the system and compute the condition number and error bounds using dgssvx. */ sgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info); printf("sgssvx(): info %d\n", info); if ( info == 0 || info == n+1 ) { /* This is how you could access the solution matrix. */ float *sol = (float*) ((DNformat*) X.Store)->nzval; if ( options.PivotGrowth == YES ) printf("Recip. pivot growth = %e\n", rpg); if ( options.ConditionNumber == YES ) printf("Recip. condition number = %e\n", rcond); if ( options.IterRefine != NOREFINE ) { printf("Iterative Refinement:\n"); printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"); for (i = 0; i < nrhs; ++i) printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]); } Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/nnz); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); fflush(stdout); } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); SUPERLU_FREE (rhsb); SUPERLU_FREE (rhsx); SUPERLU_FREE (xact); SUPERLU_FREE (etree); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); SUPERLU_FREE (R); SUPERLU_FREE (C); SUPERLU_FREE (ferr); SUPERLU_FREE (berr); Destroy_CompCol_Matrix(&A); Destroy_SuperMatrix_Store(&B); Destroy_SuperMatrix_Store(&X); if ( lwork == 0 ) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } else if ( lwork > 0 ) { SUPERLU_FREE(work); } #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Exit main()"); #endif }
SolveSuperLU (const MatriceMorse<R> &AA, int strategy, double ttgv, double epsilon, double pivot, double pivot_sym, string ¶m_char, KN<long> pperm_r, KN<long> pperm_c): eps(epsilon), epsr(0), tgv(ttgv), etree(0), string_option(param_char), perm_r(pperm_r), perm_c(pperm_c), RR(0), CC(0), tol_pivot_sym(pivot_sym), tol_pivot(pivot) { SuperMatrix B, X; SuperLUStat_t stat; void *work = 0; int info, lwork = 0/*, nrhs = 1*/; int i; double ferr[1]; double berr[1]; double rpg, rcond; R *bb; R *xx; A.Store = 0; B.Store = 0; X.Store = 0; L.Store = 0; U.Store = 0; int status; n = AA.n; m = AA.m; nnz = AA.nbcoef; arow = AA.a; asubrow = AA.cl; xarow = AA.lg; /* FreeFem++ use Morse Format */ // FFCS - "this->" required by g++ 4.7 this->CompRow_to_CompCol(m, n, nnz, arow, asubrow, xarow, &a, &asub, &xa); /* Defaults */ lwork = 0; // nrhs = 0; /* Set the default values for options argument: * options.Fact = DOFACT; * options.Equil = YES; * options.ColPerm = COLAMD; * options.DiagPivotThresh = 1.0; * options.Trans = NOTRANS; * options.IterRefine = NOREFINE; * options.SymmetricMode = NO; * options.PivotGrowth = NO; * options.ConditionNumber = NO; * options.PrintStat = YES; */ set_default_options(&options); printf(".. default options:\n"); printf("\tFact\t %8d\n", options.Fact); printf("\tEquil\t %8d\n", options.Equil); printf("\tColPerm\t %8d\n", options.ColPerm); printf("\tDiagPivotThresh %8.4f\n", options.DiagPivotThresh); printf("\tTrans\t %8d\n", options.Trans); printf("\tIterRefine\t%4d\n", options.IterRefine); printf("\tSymmetricMode\t%4d\n", options.SymmetricMode); printf("\tPivotGrowth\t%4d\n", options.PivotGrowth); printf("\tConditionNumber\t%4d\n", options.ConditionNumber); printf("..\n"); if (!string_option.empty()) {read_options_freefem(string_option, &options);} printf(".. options:\n"); printf("\tFact\t %8d\n", options.Fact); printf("\tEquil\t %8d\n", options.Equil); printf("\tColPerm\t %8d\n", options.ColPerm); printf("\tDiagPivotThresh %8.4f\n", options.DiagPivotThresh); printf("\tTrans\t %8d\n", options.Trans); printf("\tIterRefine\t%4d\n", options.IterRefine); printf("\tSymmetricMode\t%4d\n", options.SymmetricMode); printf("\tPivotGrowth\t%4d\n", options.PivotGrowth); printf("\tConditionNumber\t%4d\n", options.ConditionNumber); printf("..\n"); Dtype_t R_SLU = SuperLUDriver<R>::R_SLU_T(); // FFCS - "this->" required by g++ 4.7 this->Create_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, R_SLU, SLU_GE); this->Create_Dense_Matrix(&B, m, 0, (R *)0, m, SLU_DN, R_SLU, SLU_GE); this->Create_Dense_Matrix(&X, m, 0, (R *)0, m, SLU_DN, R_SLU, SLU_GE); if (etree.size() == 0) {etree.resize(n);} if (perm_r.size() == 0) {perm_r.resize(n);} if (perm_c.size() == 0) {perm_c.resize(n);} if (!(RR = new double[n])) { ABORT("SUPERLU_MALLOC fails for R[]."); } for (int ii = 0; ii < n; ii++) { RR[ii] = 1.; } if (!(CC = new double[m])) { ABORT("SUPERLU_MALLOC fails for C[]."); } for (int ii = 0; ii < n; ii++) { CC[ii] = 1.; } ferr[0] = 0; berr[0] = 0; /* Initialize the statistics variables. */ StatInit(&stat); /* ONLY PERFORM THE LU DECOMPOSITION */ B.ncol = 0; /* Indicate not to solve the system */ SuperLUDriver<R>::gssvx(&options, &A, perm_c, perm_r, etree, equed, RR, CC, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &Glu, &mem_usage, &stat, &info); if (verbosity > 2) { printf("LU factorization: dgssvx() returns info %d\n", info); } if (verbosity > 3) { if (info == 0 || info == n + 1) { if (options.PivotGrowth) {printf("Recip. pivot growth = %e\n", rpg);} if (options.ConditionNumber) { printf("Recip. condition number = %e\n", rcond); } Lstore = (SCformat *)L.Store; Ustore = (NCformat *)U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", mem_usage.for_lu / 1e6, mem_usage.total_needed / 1e6, stat.expansions ); fflush(stdout); } else if (info > 0 && lwork == -1) { printf("** Estimated memory: %d bytes\n", info - n); } } if (verbosity > 5) {StatPrint(&stat);} StatFree(&stat); if (B.Store) {Destroy_SuperMatrix_Store(&B);} if (X.Store) {Destroy_SuperMatrix_Store(&X);} options.Fact = FACTORED;/* Indicate the factored form of A is supplied. */ }
void dgssvx(char *fact, char *trans, char *refact, SuperMatrix *A, factor_param_t *factor_params, int *perm_c, int *perm_r, int *etree, char *equed, double *R, double *C, SuperMatrix *L, SuperMatrix *U, void *work, int lwork, SuperMatrix *B, SuperMatrix *X, double *recip_pivot_growth, double *rcond, double *ferr, double *berr, mem_usage_t *mem_usage, int *info ) { /* * Purpose * ======= * * DGSSVX solves the system of linear equations A*X=B or A'*X=B, using * the LU factorization from dgstrf(). Error bounds on the solution and * a condition estimate are also provided. It performs the following steps: * * 1. If A is stored column-wise (A->Stype = NC): * * 1.1. If fact = 'E', scaling factors are computed to equilibrate the * system: * trans = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B * trans = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B * trans = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A is * overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if trans='N') * or diag(C)*B (if trans = 'T' or 'C'). * * 1.2. Permute columns of A, forming A*Pc, where Pc is a permutation * matrix that usually preserves sparsity. * For more details of this step, see sp_preorder.c. * * 1.3. If fact = 'N' or 'E', the LU decomposition is used to factor the * matrix A (after equilibration if fact = 'E') as Pr*A*Pc = L*U, * with Pr determined by partial pivoting. * * 1.4. Compute the reciprocal pivot growth factor. * * 1.5. If some U(i,i) = 0, so that U is exactly singular, then the * routine returns with info = i. Otherwise, the factored form of * A is used to estimate the condition number of the matrix A. If * the reciprocal of the condition number is less than machine * precision, info = A->ncol+1 is returned as a warning, but the * routine still goes on to solve for X and computes error bounds * as described below. * * 1.6. The system of equations is solved for X using the factored form * of A. * * 1.7. Iterative refinement is applied to improve the computed solution * matrix and calculate error bounds and backward error estimates * for it. * * 1.8. If equilibration was used, the matrix X is premultiplied by * diag(C) (if trans = 'N') or diag(R) (if trans = 'T' or 'C') so * that it solves the original system before equilibration. * * 2. If A is stored row-wise (A->Stype = NR), apply the above algorithm * to the transpose of A: * * 2.1. If fact = 'E', scaling factors are computed to equilibrate the * system: * trans = 'N': diag(R)*A'*diag(C) *inv(diag(C))*X = diag(R)*B * trans = 'T': (diag(R)*A'*diag(C))**T *inv(diag(R))*X = diag(C)*B * trans = 'C': (diag(R)*A'*diag(C))**H *inv(diag(R))*X = diag(C)*B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A' is * overwritten by diag(R)*A'*diag(C) and B by diag(R)*B * (if trans='N') or diag(C)*B (if trans = 'T' or 'C'). * * 2.2. Permute columns of transpose(A) (rows of A), * forming transpose(A)*Pc, where Pc is a permutation matrix that * usually preserves sparsity. * For more details of this step, see sp_preorder.c. * * 2.3. If fact = 'N' or 'E', the LU decomposition is used to factor the * transpose(A) (after equilibration if fact = 'E') as * Pr*transpose(A)*Pc = L*U with the permutation Pr determined by * partial pivoting. * * 2.4. Compute the reciprocal pivot growth factor. * * 2.5. If some U(i,i) = 0, so that U is exactly singular, then the * routine returns with info = i. Otherwise, the factored form * of transpose(A) is used to estimate the condition number of the * matrix A. If the reciprocal of the condition number * is less than machine precision, info = A->nrow+1 is returned as * a warning, but the routine still goes on to solve for X and * computes error bounds as described below. * * 2.6. The system of equations is solved for X using the factored form * of transpose(A). * * 2.7. Iterative refinement is applied to improve the computed solution * matrix and calculate error bounds and backward error estimates * for it. * * 2.8. If equilibration was used, the matrix X is premultiplied by * diag(C) (if trans = 'N') or diag(R) (if trans = 'T' or 'C') so * that it solves the original system before equilibration. * * See supermatrix.h for the definition of 'SuperMatrix' structure. * * Arguments * ========= * * fact (input) char* * Specifies whether or not the factored form of the matrix * A is supplied on entry, and if not, whether the matrix A should * be equilibrated before it is factored. * = 'F': On entry, L, U, perm_r and perm_c contain the factored * form of A. If equed is not 'N', the matrix A has been * equilibrated with scaling factors R and C. * A, L, U, perm_r are not modified. * = 'N': The matrix A will be factored, and the factors will be * stored in L and U. * = 'E': The matrix A will be equilibrated if necessary, then * factored into L and U. * * trans (input) char* * Specifies the form of the system of equations: * = 'N': A * X = B (No transpose) * = 'T': A**T * X = B (Transpose) * = 'C': A**H * X = B (Transpose) * * refact (input) char* * Specifies whether we want to re-factor the matrix. * = 'N': Factor the matrix A. * = 'Y': Matrix A was factored before, now we want to re-factor * matrix A with perm_r and etree as inputs. Use * the same storage for the L\U factors previously allocated, * expand it if necessary. User should insure to use the same * memory model. In this case, perm_r may be modified due to * different pivoting determined by diagonal threshold. * If fact = 'F', then refact is not accessed. * * A (input/output) SuperMatrix* * Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number * of the linear equations is A->nrow. Currently, the type of A can be: * Stype = NC or NR, Dtype = D_, Mtype = GE. In the future, * more general A can be handled. * * On entry, If fact = 'F' and equed is not 'N', then A must have * been equilibrated by the scaling factors in R and/or C. * A is not modified if fact = 'F' or 'N', or if fact = 'E' and * equed = 'N' on exit. * * On exit, if fact = 'E' and equed is not 'N', A is scaled as follows: * If A->Stype = NC: * equed = 'R': A := diag(R) * A * equed = 'C': A := A * diag(C) * equed = 'B': A := diag(R) * A * diag(C). * If A->Stype = NR: * equed = 'R': transpose(A) := diag(R) * transpose(A) * equed = 'C': transpose(A) := transpose(A) * diag(C) * equed = 'B': transpose(A) := diag(R) * transpose(A) * diag(C). * * factor_params (input) factor_param_t* * The structure defines the input scalar parameters, consisting of * the following fields. If factor_params = NULL, the default * values are used for all the fields; otherwise, the values * are given by the user. * - panel_size (int): Panel size. A panel consists of at most * panel_size consecutive columns. If panel_size = -1, use * default value 8. * - relax (int): To control degree of relaxing supernodes. If the * number of nodes (columns) in a subtree of the elimination * tree is less than relax, this subtree is considered as one * supernode, regardless of the row structures of those columns. * If relax = -1, use default value 8. * - diag_pivot_thresh (double): Diagonal pivoting threshold. * At step j of the Gaussian elimination, if * abs(A_jj) >= diag_pivot_thresh * (max_(i>=j) abs(A_ij)), * then use A_jj as pivot. 0 <= diag_pivot_thresh <= 1. * If diag_pivot_thresh = -1, use default value 1.0, * which corresponds to standard partial pivoting. * - drop_tol (double): Drop tolerance threshold. (NOT IMPLEMENTED) * At step j of the Gaussian elimination, if * abs(A_ij)/(max_i abs(A_ij)) < drop_tol, * then drop entry A_ij. 0 <= drop_tol <= 1. * If drop_tol = -1, use default value 0.0, which corresponds to * standard Gaussian elimination. * * perm_c (input/output) int* * If A->Stype = NC, Column permutation vector of size A->ncol, * which defines the permutation matrix Pc; perm_c[i] = j means * column i of A is in position j in A*Pc. * On exit, perm_c may be overwritten by the product of the input * perm_c and a permutation that postorders the elimination tree * of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree * is already in postorder. * * If A->Stype = NR, column permutation vector of size A->nrow, * which describes permutation of columns of transpose(A) * (rows of A) as described above. * * perm_r (input/output) int* * If A->Stype = NC, row permutation vector of size A->nrow, * which defines the permutation matrix Pr, and is determined * by partial pivoting. perm_r[i] = j means row i of A is in * position j in Pr*A. * * If A->Stype = NR, permutation vector of size A->ncol, which * determines permutation of rows of transpose(A) * (columns of A) as described above. * * If refact is not 'Y', perm_r is output argument; * If refact = 'Y', the pivoting routine will try to use the input * perm_r, unless a certain threshold criterion is violated. * In that case, perm_r is overwritten by a new permutation * determined by partial pivoting or diagonal threshold pivoting. * * etree (input/output) int*, dimension (A->ncol) * Elimination tree of Pc'*A'*A*Pc. * If fact is not 'F' and refact = 'Y', etree is an input argument, * otherwise it is an output argument. * Note: etree is a vector of parent pointers for a forest whose * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol. * * equed (input/output) char* * Specifies the form of equilibration that was done. * = 'N': No equilibration. * = 'R': Row equilibration, i.e., A was premultiplied by diag(R). * = 'C': Column equilibration, i.e., A was postmultiplied by diag(C). * = 'B': Both row and column equilibration, i.e., A was replaced * by diag(R)*A*diag(C). * If fact = 'F', equed is an input argument, otherwise it is * an output argument. * * R (input/output) double*, dimension (A->nrow) * The row scale factors for A or transpose(A). * If equed = 'R' or 'B', A (if A->Stype = NC) or transpose(A) (if * A->Stype = NR) is multiplied on the left by diag(R). * If equed = 'N' or 'C', R is not accessed. * If fact = 'F', R is an input argument; otherwise, R is output. * If fact = 'F' and equed = 'R' or 'B', each element of R must * be positive. * * C (input/output) double*, dimension (A->ncol) * The column scale factors for A or transpose(A). * If equed = 'C' or 'B', A (if A->Stype = NC) or transpose(A) (if * A->Stype = NR) is multiplied on the right by diag(C). * If equed = 'N' or 'R', C is not accessed. * If fact = 'F', C is an input argument; otherwise, C is output. * If fact = 'F' and equed = 'C' or 'B', each element of C must * be positive. * * L (output) SuperMatrix* * The factor L from the factorization * Pr*A*Pc=L*U (if A->Stype = NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = NR). * Uses compressed row subscripts storage for supernodes, i.e., * L has types: Stype = SC, Dtype = D_, Mtype = TRLU. * * U (output) SuperMatrix* * The factor U from the factorization * Pr*A*Pc=L*U (if A->Stype = NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = NR). * Uses column-wise storage scheme, i.e., U has types: * Stype = NC, Dtype = D_, Mtype = TRU. * * work (workspace/output) void*, size (lwork) (in bytes) * User supplied workspace, should be large enough * to hold data structures for factors L and U. * On exit, if fact is not 'F', L and U point to this array. * * lwork (input) int * Specifies the size of work array in bytes. * = 0: allocate space internally by system malloc; * > 0: use user-supplied work array of length lwork in bytes, * returns error if space runs out. * = -1: the routine guesses the amount of space needed without * performing the factorization, and returns it in * mem_usage->total_needed; no other side effects. * * See argument 'mem_usage' for memory usage statistics. * * B (input/output) SuperMatrix* * B has types: Stype = DN, Dtype = D_, Mtype = GE. * On entry, the right hand side matrix. * On exit, * if equed = 'N', B is not modified; otherwise * if A->Stype = NC: * if trans = 'N' and equed = 'R' or 'B', B is overwritten by * diag(R)*B; * if trans = 'T' or 'C' and equed = 'C' of 'B', B is * overwritten by diag(C)*B; * if A->Stype = NR: * if trans = 'N' and equed = 'C' or 'B', B is overwritten by * diag(C)*B; * if trans = 'T' or 'C' and equed = 'R' of 'B', B is * overwritten by diag(R)*B. * * X (output) SuperMatrix* * X has types: Stype = DN, Dtype = D_, Mtype = GE. * If info = 0 or info = A->ncol+1, X contains the solution matrix * to the original system of equations. Note that A and B are modified * on exit if equed is not 'N', and the solution to the equilibrated * system is inv(diag(C))*X if trans = 'N' and equed = 'C' or 'B', * or inv(diag(R))*X if trans = 'T' or 'C' and equed = 'R' or 'B'. * * recip_pivot_growth (output) double* * The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ). * The infinity norm is used. If recip_pivot_growth is much less * than 1, the stability of the LU factorization could be poor. * * rcond (output) double* * The estimate of the reciprocal condition number of the matrix A * after equilibration (if done). If rcond is less than the machine * precision (in particular, if rcond = 0), the matrix is singular * to working precision. This condition is indicated by a return * code of info > 0. * * FERR (output) double*, dimension (B->ncol) * The estimated forward error bound for each solution vector * X(j) (the j-th column of the solution matrix X). * If XTRUE is the true solution corresponding to X(j), FERR(j) * is an estimated upper bound for the magnitude of the largest * element in (X(j) - XTRUE) divided by the magnitude of the * largest element in X(j). The estimate is as reliable as * the estimate for RCOND, and is almost always a slight * overestimate of the true error. * * BERR (output) double*, dimension (B->ncol) * The componentwise relative backward error of each solution * vector X(j) (i.e., the smallest relative change in * any element of A or B that makes X(j) an exact solution). * * mem_usage (output) mem_usage_t* * Record the memory usage statistics, consisting of following fields: * - for_lu (float) * The amount of space used in bytes for L\U data structures. * - total_needed (float) * The amount of space needed in bytes to perform factorization. * - expansions (int) * The number of memory expansions during the LU factorization. * * info (output) int* * = 0: successful exit * < 0: if info = -i, the i-th argument had an illegal value * > 0: if info = i, and i is * <= A->ncol: U(i,i) is exactly zero. The factorization has * been completed, but the factor U is exactly * singular, so the solution and error bounds * could not be computed. * = A->ncol+1: U is nonsingular, but RCOND is less than machine * precision, meaning that the matrix is singular to * working precision. Nevertheless, the solution and * error bounds are computed because there are a number * of situations where the computed solution can be more * accurate than the value of RCOND would suggest. * > A->ncol+1: number of bytes allocated when memory allocation * failure occurred, plus A->ncol. * */ DNformat *Bstore, *Xstore; double *Bmat, *Xmat; int ldb, ldx, nrhs; SuperMatrix *AA; /* A in NC format used by the factorization routine.*/ SuperMatrix AC; /* Matrix postmultiplied by Pc */ int colequ, equil, nofact, notran, rowequ; char trant[1], norm[1]; int i, j, info1; double amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin; int relax, panel_size; double diag_pivot_thresh, drop_tol; double t0; /* temporary time */ double *utime; extern SuperLUStat_t SuperLUStat; /* External functions */ extern double dlangs(char *, SuperMatrix *); extern double dlamch_(char *); Bstore = B->Store; Xstore = X->Store; Bmat = Bstore->nzval; Xmat = Xstore->nzval; ldb = Bstore->lda; ldx = Xstore->lda; nrhs = B->ncol; #if 0 printf("dgssvx: fact=%c, trans=%c, refact=%c, equed=%c\n", *fact, *trans, *refact, *equed); #endif *info = 0; nofact = lsame_(fact, "N"); equil = lsame_(fact, "E"); notran = lsame_(trans, "N"); if (nofact || equil) { *(unsigned char *)equed = 'N'; rowequ = FALSE; colequ = FALSE; } else { rowequ = lsame_(equed, "R") || lsame_(equed, "B"); colequ = lsame_(equed, "C") || lsame_(equed, "B"); smlnum = dlamch_("Safe minimum"); bignum = 1. / smlnum; } /* Test the input parameters */ if (!nofact && !equil && !lsame_(fact, "F")) *info = -1; else if (!notran && !lsame_(trans, "T") && !lsame_(trans, "C")) *info = -2; else if ( !(lsame_(refact,"Y") || lsame_(refact, "N")) ) *info = -3; else if ( A->nrow != A->ncol || A->nrow < 0 || (A->Stype != NC && A->Stype != NR) || A->Dtype != D_ || A->Mtype != GE ) *info = -4; else if (lsame_(fact, "F") && !(rowequ || colequ || lsame_(equed, "N"))) *info = -9; else { if (rowequ) { rcmin = bignum; rcmax = 0.; for (j = 0; j < A->nrow; ++j) { rcmin = SUPERLU_MIN(rcmin, R[j]); rcmax = SUPERLU_MAX(rcmax, R[j]); } if (rcmin <= 0.) *info = -10; else if ( A->nrow > 0) rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); else rowcnd = 1.; } if (colequ && *info == 0) { rcmin = bignum; rcmax = 0.; for (j = 0; j < A->nrow; ++j) { rcmin = SUPERLU_MIN(rcmin, C[j]); rcmax = SUPERLU_MAX(rcmax, C[j]); } if (rcmin <= 0.) *info = -11; else if (A->nrow > 0) colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); else colcnd = 1.; } if (*info == 0) { if ( lwork < -1 ) *info = -15; else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) || B->Stype != DN || B->Dtype != D_ || B->Mtype != GE ) *info = -16; else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) || B->ncol != X->ncol || X->Stype != DN || X->Dtype != D_ || X->Mtype != GE ) *info = -17; } } if (*info != 0) { i = -(*info); xerbla_("dgssvx", &i); return; } /* Default values for factor_params */ panel_size = sp_ienv(1); relax = sp_ienv(2); diag_pivot_thresh = 1.0; drop_tol = 0.0; if ( factor_params != NULL ) { if ( factor_params->panel_size != -1 ) panel_size = factor_params->panel_size; if ( factor_params->relax != -1 ) relax = factor_params->relax; if ( factor_params->diag_pivot_thresh != -1 ) diag_pivot_thresh = factor_params->diag_pivot_thresh; if ( factor_params->drop_tol != -1 ) drop_tol = factor_params->drop_tol; } StatInit(panel_size, relax); utime = SuperLUStat.utime; /* Convert A to NC format when necessary. */ if ( A->Stype == NR ) { NRformat *Astore = A->Store; AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, Astore->nzval, Astore->colind, Astore->rowptr, NC, A->Dtype, A->Mtype); if ( notran ) { /* Reverse the transpose argument. */ *trant = 'T'; notran = 0; } else { *trant = 'N'; notran = 1; } } else { /* A->Stype == NC */ *trant = *trans; AA = A; } if ( equil ) { t0 = SuperLU_timer_(); /* Compute row and column scalings to equilibrate the matrix A. */ dgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1); if ( info1 == 0 ) { /* Equilibrate matrix A. */ dlaqgs(AA, R, C, rowcnd, colcnd, amax, equed); rowequ = lsame_(equed, "R") || lsame_(equed, "B"); colequ = lsame_(equed, "C") || lsame_(equed, "B"); } utime[EQUIL] = SuperLU_timer_() - t0; } /* Scale the right hand side if equilibration was performed. */ if ( notran ) { if ( rowequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Bmat[i + j*ldb] *= R[i]; } } } else if ( colequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Bmat[i + j*ldb] *= C[i]; } } if ( nofact || equil ) { t0 = SuperLU_timer_(); sp_preorder(refact, AA, perm_c, etree, &AC); utime[ETREE] = SuperLU_timer_() - t0; /* printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", relax, panel_size, sp_ienv(3), sp_ienv(4)); fflush(stdout); */ /* Compute the LU factorization of A*Pc. */ t0 = SuperLU_timer_(); dgstrf(refact, &AC, diag_pivot_thresh, drop_tol, relax, panel_size, etree, work, lwork, perm_r, perm_c, L, U, info); utime[FACT] = SuperLU_timer_() - t0; if ( lwork == -1 ) { mem_usage->total_needed = *info - A->ncol; return; } } if ( *info > 0 ) { if ( *info <= A->ncol ) { /* Compute the reciprocal pivot growth factor of the leading rank-deficient *info columns of A. */ *recip_pivot_growth = dPivotGrowth(*info, AA, perm_c, L, U); } return; } /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */ *recip_pivot_growth = dPivotGrowth(A->ncol, AA, perm_c, L, U); /* Estimate the reciprocal of the condition number of A. */ t0 = SuperLU_timer_(); if ( notran ) { *(unsigned char *)norm = '1'; } else { *(unsigned char *)norm = 'I'; } anorm = dlangs(norm, AA); dgscon(norm, L, U, anorm, rcond, info); utime[RCOND] = SuperLU_timer_() - t0; /* Compute the solution matrix X. */ for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */ for (i = 0; i < B->nrow; i++) Xmat[i + j*ldx] = Bmat[i + j*ldb]; t0 = SuperLU_timer_(); dgstrs (trant, L, U, perm_r, perm_c, X, info); utime[SOLVE] = SuperLU_timer_() - t0; /* Use iterative refinement to improve the computed solution and compute error bounds and backward error estimates for it. */ t0 = SuperLU_timer_(); dgsrfs(trant, AA, L, U, perm_r, perm_c, equed, R, C, B, X, ferr, berr, info); utime[REFINE] = SuperLU_timer_() - t0; /* Transform the solution matrix X to a solution of the original system. */ if ( notran ) { if ( colequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Xmat[i + j*ldx] *= C[i]; } } } else if ( rowequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Xmat[i + j*ldx] *= R[i]; } } /* Set INFO = A->ncol+1 if the matrix is singular to working precision. */ if ( *rcond < dlamch_("E") ) *info = A->ncol + 1; dQuerySpace(L, U, panel_size, mem_usage); if ( nofact || equil ) Destroy_CompCol_Permuted(&AC); if ( A->Stype == NR ) { Destroy_SuperMatrix_Store(AA); SUPERLU_FREE(AA); } /* PrintStat( &SuperLUStat ); */ StatFree(); }
main(int argc, char *argv[]) { SuperMatrix A; NCformat *Astore; doublecomplex *a; int *asub, *xa; int *perm_c; /* column permutation vector */ int *perm_r; /* row permutations from partial pivoting */ SuperMatrix L; /* factor L */ SCformat *Lstore; SuperMatrix U; /* factor U */ NCformat *Ustore; SuperMatrix B; int nrhs, ldx, info, m, n, nnz; doublecomplex *xact, *rhs; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Enter main()"); #endif /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options(&options); /* Now we modify the default options to use the symmetric mode. */ options.SymmetricMode = YES; options.ColPerm = MMD_AT_PLUS_A; options.DiagPivotThresh = 0.001; /* Read the matrix in Harwell-Boeing format. */ zreadhb(&m, &n, &nnz, &a, &asub, &xa); zCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_Z, SLU_GE); Astore = A.Store; printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz); nrhs = 1; if ( !(rhs = doublecomplexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhs[]."); zCreate_Dense_Matrix(&B, m, nrhs, rhs, m, SLU_DN, SLU_Z, SLU_GE); xact = doublecomplexMalloc(n * nrhs); ldx = n; zGenXtrue(n, nrhs, xact, ldx); zFillRHS(options.Trans, nrhs, xact, ldx, &A, &B); if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[]."); /* Initialize the statistics variables. */ StatInit(&stat); zgssv(&options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info); if ( info == 0 ) { /* This is how you could access the solution matrix. */ doublecomplex *sol = (doublecomplex*) ((DNformat*) B.Store)->nzval; /* Compute the infinity norm of the error. */ zinf_norm_error(nrhs, &B, xact); Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); zQuerySpace(&L, &U, &mem_usage); printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6, mem_usage.expansions); } else { printf("zgssv() error returns INFO= %d\n", info); if ( info <= n ) { /* factorization completes */ zQuerySpace(&L, &U, &mem_usage); printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6, mem_usage.expansions); } } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); SUPERLU_FREE (rhs); SUPERLU_FREE (xact); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); Destroy_CompCol_Matrix(&A); Destroy_SuperMatrix_Store(&B); Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Exit main()"); #endif }
int main ( int argc, char *argv[] ) /**********************************************************************/ /* Purpose: SUPER_LU_S3 solves a sparse system read from a file using SGSSVX. Discussion: The sparse matrix is stored in a file using the Harwell-Boeing sparse matrix format. The file should be assigned to the standard input of this program. For instance, if the matrix is stored in the file "g10_rua.txt", the execution command might be: super_lu_s3 < g10_rua.txt Modified: 25 April 2004 Reference: James Demmel, John Gilbert, Xiaoye Li, SuperLU Users's Guide, Sections 1 and 2. Local parameters: SuperMatrix L, the computed L factor. int *perm_c, the column permutation vector. int *perm_r, the row permutations from partial pivoting. SuperMatrix U, the computed U factor. */ { SuperMatrix A; NCformat *Astore; float *a; int *asub; SuperMatrix B; float *berr; float *C; char equed[1]; yes_no_t equil; int *etree; float *ferr; int i; int info; SuperMatrix L; int ldx; SCformat *Lstore; int lwork; int m; mem_usage_t mem_usage; int n; int nnz; int nrhs; superlu_options_t options; int *perm_c; int *perm_r; float *R; float rcond; float *rhsb; float *rhsx; float rpg; float *sol; SuperLUStat_t stat; trans_t trans; SuperMatrix U; float u; NCformat *Ustore; void *work; SuperMatrix X; int *xa; float *xact; /* Say hello. */ printf ( "\n" ); printf ( "SUPER_LU_S3:\n" ); printf ( " Read a sparse matrix A from standard input,\n"); printf ( " stored in Harwell-Boeing Sparse Matrix format.\n" ); printf ( "\n" ); printf ( " Solve a linear system A * X = B using SGSSVX.\n" ); /* Defaults */ lwork = 0; nrhs = 1; equil = YES; u = 1.0; trans = NOTRANS; /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options ( &options ); /* Can use command line input to modify the defaults. */ parse_command_line ( argc, argv, &lwork, &u, &equil, &trans ); options.Equil = equil; options.DiagPivotThresh = u; options.Trans = trans; printf ( "\n" ); printf ( " Length of work array LWORK = %d\n", lwork ); printf ( " Equilibration option EQUIL = %d\n", equil ); printf ( " Diagonal pivot threshhold value U = %f\n", u ); printf ( " Tranpose option TRANS = %d\n", trans ); /* Add more functionalities that the defaults. Compute reciprocal pivot growth */ options.PivotGrowth = YES; /* Compute reciprocal condition number */ options.ConditionNumber = YES; /* Perform single-precision refinement */ options.IterRefine = SINGLE; if ( 0 < lwork ) { work = SUPERLU_MALLOC(lwork); if ( !work ) { ABORT ( "SUPERLU_MALLOC cannot allocate work[]" ); } } /* Read matrix A from a file in Harwell-Boeing format. */ sreadhb ( &m, &n, &nnz, &a, &asub, &xa ); /* Create storage for a compressed column matrix. */ sCreate_CompCol_Matrix ( &A, m, n, nnz, a, asub, xa, SLU_NC, SLU_S, SLU_GE ); Astore = A.Store; printf ( "\n" ); printf ( " Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz ); rhsb = floatMalloc ( m * nrhs ); if ( !rhsb ) { ABORT ( "Malloc fails for rhsb[]." ); } rhsx = floatMalloc ( m * nrhs ); if ( !rhsx ) { ABORT ( "Malloc fails for rhsx[]." ); } sCreate_Dense_Matrix ( &B, m, nrhs, rhsb, m, SLU_DN, SLU_S, SLU_GE ); sCreate_Dense_Matrix ( &X, m, nrhs, rhsx, m, SLU_DN, SLU_S, SLU_GE ); xact = floatMalloc ( n * nrhs ); if ( !xact ) { ABORT ( "SUPERLU_MALLOC cannot allocate xact[]" ); } ldx = n; sGenXtrue ( n, nrhs, xact, ldx ); sFillRHS ( trans, nrhs, xact, ldx, &A, &B ); etree = intMalloc ( n ); if ( !etree ) { ABORT ( "Malloc fails for etree[]." ); } perm_c = intMalloc ( n ); if ( !perm_c ) { ABORT ( "Malloc fails for perm_c[]." ); } perm_r = intMalloc ( m ); if ( !perm_r ) { ABORT ( "Malloc fails for perm_r[]." ); } R = (float *) SUPERLU_MALLOC ( A.nrow * sizeof(float) ); if ( !R ) { ABORT ( "SUPERLU_MALLOC fails for R[]." ); } C = (float *) SUPERLU_MALLOC ( A.ncol * sizeof(float) ); if ( !C ) { ABORT ( "SUPERLU_MALLOC fails for C[]." ); } ferr = (float *) SUPERLU_MALLOC ( nrhs * sizeof(float) ); if ( !ferr ) { ABORT ( "SUPERLU_MALLOC fails for ferr[]." ); } berr = (float *) SUPERLU_MALLOC ( nrhs * sizeof(float) ); if ( !berr ) { ABORT ( "SUPERLU_MALLOC fails for berr[]." ); } /* Initialize the statistics variables. */ StatInit(&stat); /* Solve the system and compute the condition number and error bounds using SGSSVX. */ sgssvx ( &options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info ); printf ( "\n" ); printf ( " SGSSVX returns INFO = %d\n", info ); if ( info == 0 || info == n+1 ) { sol = (float*) ((DNformat*) X.Store)->nzval; if ( options.PivotGrowth == YES ) { printf ( "\n" ); printf ( " Reciprocal pivot growth = %e\n", rpg); } if ( options.ConditionNumber == YES ) { printf ( "\n" ); printf ( " Reciprocal condition number = %e\n", rcond); } if ( options.IterRefine != NOREFINE ) { printf ( "\n" ); printf ( " Iterative Refinement:\n"); printf ( "%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"); for ( i = 0; i < nrhs; i++ ) { printf ( "%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]); } } Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf ( "\n" ); printf ( " Number of nonzeros in factor L = %d\n", Lstore->nnz ); printf ( " Number of nonzeros in factor U = %d\n", Ustore->nnz ); printf ( " Number of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n ); printf ( "\n" ); printf ( " L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6, mem_usage.expansions ); fflush ( stdout ); } else if ( info > 0 && lwork == -1 ) { printf ( "\n" ); printf ( " Estimated memory: %d bytes\n", info - n ); } if ( options.PrintStat ) { StatPrint ( &stat ); } StatFree ( &stat ); SUPERLU_FREE ( rhsb ); SUPERLU_FREE ( rhsx ); SUPERLU_FREE ( xact ); SUPERLU_FREE ( etree ); SUPERLU_FREE ( perm_r ); SUPERLU_FREE ( perm_c ); SUPERLU_FREE ( R ); SUPERLU_FREE ( C ); SUPERLU_FREE ( ferr ); SUPERLU_FREE ( berr ); Destroy_CompCol_Matrix ( &A ); Destroy_SuperMatrix_Store ( &B ); Destroy_SuperMatrix_Store ( &X ); if ( 0 <= lwork ) { Destroy_SuperNode_Matrix ( &L ); Destroy_CompCol_Matrix ( &U ); } /* Say goodbye. */ printf ( "\n" ); printf ( "SUPER_LU_S3:\n" ); printf ( " Normal end of execution.\n"); return 0; }
main(int argc, char *argv[]) { /* * Purpose * ======= * * SDRIVE is the main test program for the FLOAT linear * equation driver routines SGSSV and SGSSVX. * * The program is invoked by a shell script file -- stest.csh. * The output from the tests are written into a file -- stest.out. * * ===================================================================== */ float *a, *a_save; int *asub, *asub_save; int *xa, *xa_save; SuperMatrix A, B, X, L, U; SuperMatrix ASAV, AC; GlobalLU_t Glu; /* Not needed on return. */ mem_usage_t mem_usage; int *perm_r; /* row permutation from partial pivoting */ int *perm_c, *pc_save; /* column permutation */ int *etree; float zero = 0.0; float *R, *C; float *ferr, *berr; float *rwork; float *wwork; void *work; int info, lwork, nrhs, panel_size, relax; int m, n, nnz; float *xact; float *rhsb, *solx, *bsav; int ldb, ldx; float rpg, rcond; int i, j, k1; float rowcnd, colcnd, amax; int maxsuper, rowblk, colblk; int prefact, nofact, equil, iequed; int nt, nrun, nfail, nerrs, imat, fimat, nimat; int nfact, ifact, itran; int kl, ku, mode, lda; int zerot, izero, ioff; double u; float anorm, cndnum; float *Afull; float result[NTESTS]; superlu_options_t options; fact_t fact; trans_t trans; SuperLUStat_t stat; static char matrix_type[8]; static char equed[1], path[4], sym[1], dist[1]; FILE *fp; /* Fixed set of parameters */ int iseed[] = {1988, 1989, 1990, 1991}; static char equeds[] = {'N', 'R', 'C', 'B'}; static fact_t facts[] = {FACTORED, DOFACT, SamePattern, SamePattern_SameRowPerm}; static trans_t transs[] = {NOTRANS, TRANS, CONJ}; /* Some function prototypes */ extern int sgst01(int, int, SuperMatrix *, SuperMatrix *, SuperMatrix *, int *, int *, float *); extern int sgst02(trans_t, int, int, int, SuperMatrix *, float *, int, float *, int, float *resid); extern int sgst04(int, int, float *, int, float *, int, float rcond, float *resid); extern int sgst07(trans_t, int, int, SuperMatrix *, float *, int, float *, int, float *, int, float *, float *, float *); extern int slatb4_slu(char *, int *, int *, int *, char *, int *, int *, float *, int *, float *, char *); extern int slatms_slu(int *, int *, char *, int *, char *, float *d, int *, float *, float *, int *, int *, char *, float *, int *, float *, int *); extern int sp_sconvert(int, int, float *, int, int, int, float *a, int *, int *, int *); /* Executable statements */ strcpy(path, "SGE"); nrun = 0; nfail = 0; nerrs = 0; /* Defaults */ lwork = 0; n = 1; nrhs = 1; panel_size = sp_ienv(1); relax = sp_ienv(2); u = 1.0; strcpy(matrix_type, "LA"); parse_command_line(argc, argv, matrix_type, &n, &panel_size, &relax, &nrhs, &maxsuper, &rowblk, &colblk, &lwork, &u, &fp); if ( lwork > 0 ) { work = SUPERLU_MALLOC(lwork); if ( !work ) { fprintf(stderr, "expert: cannot allocate %d bytes\n", lwork); exit (-1); } } /* Set the default input options. */ set_default_options(&options); options.DiagPivotThresh = u; options.PrintStat = NO; options.PivotGrowth = YES; options.ConditionNumber = YES; options.IterRefine = SLU_SINGLE; if ( strcmp(matrix_type, "LA") == 0 ) { /* Test LAPACK matrix suite. */ m = n; lda = SUPERLU_MAX(n, 1); nnz = n * n; /* upper bound */ fimat = 1; nimat = NTYPES; Afull = floatCalloc(lda * n); sallocateA(n, nnz, &a, &asub, &xa); } else { /* Read a sparse matrix */ fimat = nimat = 0; sreadhb(fp, &m, &n, &nnz, &a, &asub, &xa); } sallocateA(n, nnz, &a_save, &asub_save, &xa_save); rhsb = floatMalloc(m * nrhs); bsav = floatMalloc(m * nrhs); solx = floatMalloc(n * nrhs); ldb = m; ldx = n; sCreate_Dense_Matrix(&B, m, nrhs, rhsb, ldb, SLU_DN, SLU_S, SLU_GE); sCreate_Dense_Matrix(&X, n, nrhs, solx, ldx, SLU_DN, SLU_S, SLU_GE); xact = floatMalloc(n * nrhs); etree = intMalloc(n); perm_r = intMalloc(n); perm_c = intMalloc(n); pc_save = intMalloc(n); R = (float *) SUPERLU_MALLOC(m*sizeof(float)); C = (float *) SUPERLU_MALLOC(n*sizeof(float)); ferr = (float *) SUPERLU_MALLOC(nrhs*sizeof(float)); berr = (float *) SUPERLU_MALLOC(nrhs*sizeof(float)); j = SUPERLU_MAX(m,n) * SUPERLU_MAX(4,nrhs); rwork = (float *) SUPERLU_MALLOC(j*sizeof(float)); for (i = 0; i < j; ++i) rwork[i] = 0.; if ( !R ) ABORT("SUPERLU_MALLOC fails for R"); if ( !C ) ABORT("SUPERLU_MALLOC fails for C"); if ( !ferr ) ABORT("SUPERLU_MALLOC fails for ferr"); if ( !berr ) ABORT("SUPERLU_MALLOC fails for berr"); if ( !rwork ) ABORT("SUPERLU_MALLOC fails for rwork"); wwork = floatCalloc( SUPERLU_MAX(m,n) * SUPERLU_MAX(4,nrhs) ); for (i = 0; i < n; ++i) perm_c[i] = pc_save[i] = i; options.ColPerm = MY_PERMC; for (imat = fimat; imat <= nimat; ++imat) { /* All matrix types */ if ( imat ) { /* Skip types 5, 6, or 7 if the matrix size is too small. */ zerot = (imat >= 5 && imat <= 7); if ( zerot && n < imat-4 ) continue; /* Set up parameters with SLATB4 and generate a test matrix with SLATMS. */ slatb4_slu(path, &imat, &n, &n, sym, &kl, &ku, &anorm, &mode, &cndnum, dist); slatms_slu(&n, &n, dist, iseed, sym, &rwork[0], &mode, &cndnum, &anorm, &kl, &ku, "No packing", Afull, &lda, &wwork[0], &info); if ( info ) { printf(FMT3, "SLATMS", info, izero, n, nrhs, imat, nfail); continue; } /* For types 5-7, zero one or more columns of the matrix to test that INFO is returned correctly. */ if ( zerot ) { if ( imat == 5 ) izero = 1; else if ( imat == 6 ) izero = n; else izero = n / 2 + 1; ioff = (izero - 1) * lda; if ( imat < 7 ) { for (i = 0; i < n; ++i) Afull[ioff + i] = zero; } else { for (j = 0; j < n - izero + 1; ++j) for (i = 0; i < n; ++i) Afull[ioff + i + j*lda] = zero; } } else { izero = 0; } /* Convert to sparse representation. */ sp_sconvert(n, n, Afull, lda, kl, ku, a, asub, xa, &nnz); } else { izero = 0; zerot = 0; } sCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_S, SLU_GE); /* Save a copy of matrix A in ASAV */ sCreate_CompCol_Matrix(&ASAV, m, n, nnz, a_save, asub_save, xa_save, SLU_NC, SLU_S, SLU_GE); sCopy_CompCol_Matrix(&A, &ASAV); /* Form exact solution. */ sGenXtrue(n, nrhs, xact, ldx); StatInit(&stat); for (iequed = 0; iequed < 4; ++iequed) { *equed = equeds[iequed]; if (iequed == 0) nfact = 4; else nfact = 1; /* Only test factored, pre-equilibrated matrix */ for (ifact = 0; ifact < nfact; ++ifact) { fact = facts[ifact]; options.Fact = fact; for (equil = 0; equil < 2; ++equil) { options.Equil = equil; prefact = ( options.Fact == FACTORED || options.Fact == SamePattern_SameRowPerm ); /* Need a first factor */ nofact = (options.Fact != FACTORED); /* Not factored */ /* Restore the matrix A. */ sCopy_CompCol_Matrix(&ASAV, &A); if ( zerot ) { if ( prefact ) continue; } else if ( options.Fact == FACTORED ) { if ( equil || iequed ) { /* Compute row and column scale factors to equilibrate matrix A. */ sgsequ(&A, R, C, &rowcnd, &colcnd, &amax, &info); /* Force equilibration. */ if ( !info && n > 0 ) { if ( strncmp(equed, "R", 1)==0 ) { rowcnd = 0.; colcnd = 1.; } else if ( strncmp(equed, "C", 1)==0 ) { rowcnd = 1.; colcnd = 0.; } else if ( strncmp(equed, "B", 1)==0 ) { rowcnd = 0.; colcnd = 0.; } } /* Equilibrate the matrix. */ slaqgs(&A, R, C, rowcnd, colcnd, amax, equed); } } if ( prefact ) { /* Need a factor for the first time */ /* Save Fact option. */ fact = options.Fact; options.Fact = DOFACT; /* Preorder the matrix, obtain the column etree. */ sp_preorder(&options, &A, perm_c, etree, &AC); /* Factor the matrix AC. */ sgstrf(&options, &AC, relax, panel_size, etree, work, lwork, perm_c, perm_r, &L, &U, &Glu, &stat, &info); if ( info ) { printf("** First factor: info %d, equed %c\n", info, *equed); if ( lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); exit(0); } } Destroy_CompCol_Permuted(&AC); /* Restore Fact option. */ options.Fact = fact; } /* if .. first time factor */ for (itran = 0; itran < NTRAN; ++itran) { trans = transs[itran]; options.Trans = trans; /* Restore the matrix A. */ sCopy_CompCol_Matrix(&ASAV, &A); /* Set the right hand side. */ sFillRHS(trans, nrhs, xact, ldx, &A, &B); sCopy_Dense_Matrix(m, nrhs, rhsb, ldb, bsav, ldb); /*---------------- * Test sgssv *----------------*/ if ( options.Fact == DOFACT && itran == 0) { /* Not yet factored, and untransposed */ sCopy_Dense_Matrix(m, nrhs, rhsb, ldb, solx, ldx); sgssv(&options, &A, perm_c, perm_r, &L, &U, &X, &stat, &info); if ( info && info != izero ) { printf(FMT3, "sgssv", info, izero, n, nrhs, imat, nfail); } else { /* Reconstruct matrix from factors and compute residual. */ sgst01(m, n, &A, &L, &U, perm_c, perm_r, &result[0]); nt = 1; if ( izero == 0 ) { /* Compute residual of the computed solution. */ sCopy_Dense_Matrix(m, nrhs, rhsb, ldb, wwork, ldb); sgst02(trans, m, n, nrhs, &A, solx, ldx, wwork,ldb, &result[1]); nt = 2; } /* Print information about the tests that did not pass the threshold. */ for (i = 0; i < nt; ++i) { if ( result[i] >= THRESH ) { printf(FMT1, "sgssv", n, i, result[i]); ++nfail; } } nrun += nt; } /* else .. info == 0 */ /* Restore perm_c. */ for (i = 0; i < n; ++i) perm_c[i] = pc_save[i]; if (lwork == 0) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } } /* if .. end of testing sgssv */ /*---------------- * Test sgssvx *----------------*/ /* Equilibrate the matrix if fact = FACTORED and equed = 'R', 'C', or 'B'. */ if ( options.Fact == FACTORED && (equil || iequed) && n > 0 ) { slaqgs(&A, R, C, rowcnd, colcnd, amax, equed); } /* Solve the system and compute the condition number and error bounds using sgssvx. */ sgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &Glu, &mem_usage, &stat, &info); if ( info && info != izero ) { printf(FMT3, "sgssvx", info, izero, n, nrhs, imat, nfail); if ( lwork == -1 ) { printf("** Estimated memory: %.0f bytes\n", mem_usage.total_needed); exit(0); } } else { if ( !prefact ) { /* Reconstruct matrix from factors and compute residual. */ sgst01(m, n, &A, &L, &U, perm_c, perm_r, &result[0]); k1 = 0; } else { k1 = 1; } if ( !info ) { /* Compute residual of the computed solution.*/ sCopy_Dense_Matrix(m, nrhs, bsav, ldb, wwork, ldb); sgst02(trans, m, n, nrhs, &ASAV, solx, ldx, wwork, ldb, &result[1]); /* Check solution from generated exact solution. */ sgst04(n, nrhs, solx, ldx, xact, ldx, rcond, &result[2]); /* Check the error bounds from iterative refinement. */ sgst07(trans, n, nrhs, &ASAV, bsav, ldb, solx, ldx, xact, ldx, ferr, berr, &result[3]); /* Print information about the tests that did not pass the threshold. */ for (i = k1; i < NTESTS; ++i) { if ( result[i] >= THRESH ) { printf(FMT2, "sgssvx", options.Fact, trans, *equed, n, imat, i, result[i]); ++nfail; } } nrun += NTESTS; } /* if .. info == 0 */ } /* else .. end of testing sgssvx */ } /* for itran ... */ if ( lwork == 0 ) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } } /* for equil ... */ } /* for ifact ... */ } /* for iequed ... */ #if 0 if ( !info ) { PrintPerf(&L, &U, &mem_usage, rpg, rcond, ferr, berr, equed); } #endif Destroy_SuperMatrix_Store(&A); Destroy_SuperMatrix_Store(&ASAV); StatFree(&stat); } /* for imat ... */ /* Print a summary of the results. */ PrintSumm("SGE", nfail, nrun, nerrs); if ( strcmp(matrix_type, "LA") == 0 ) SUPERLU_FREE (Afull); SUPERLU_FREE (rhsb); SUPERLU_FREE (bsav); SUPERLU_FREE (solx); SUPERLU_FREE (xact); SUPERLU_FREE (etree); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); SUPERLU_FREE (pc_save); SUPERLU_FREE (R); SUPERLU_FREE (C); SUPERLU_FREE (ferr); SUPERLU_FREE (berr); SUPERLU_FREE (rwork); SUPERLU_FREE (wwork); Destroy_SuperMatrix_Store(&B); Destroy_SuperMatrix_Store(&X); #if 0 Destroy_CompCol_Matrix(&A); Destroy_CompCol_Matrix(&ASAV); #else SUPERLU_FREE(a); SUPERLU_FREE(asub); SUPERLU_FREE(xa); SUPERLU_FREE(a_save); SUPERLU_FREE(asub_save); SUPERLU_FREE(xa_save); #endif if ( lwork > 0 ) { SUPERLU_FREE (work); Destroy_SuperMatrix_Store(&L); Destroy_SuperMatrix_Store(&U); } return 0; }
main(int argc, char *argv[]) { /* * Purpose * ======= * * The driver program ZLINSOLX1. * * This example illustrates how to use ZGSSVX to solve systems with the same * A but different right-hand side. * In this case, we factorize A only once in the first call to DGSSVX, * and reuse the following data structures in the subsequent call to ZGSSVX: * perm_c, perm_r, R, C, L, U. * */ char equed[1]; yes_no_t equil; trans_t trans; SuperMatrix A, L, U; SuperMatrix B, X; NCformat *Astore; NCformat *Ustore; SCformat *Lstore; doublecomplex *a; int *asub, *xa; int *perm_c; /* column permutation vector */ int *perm_r; /* row permutations from partial pivoting */ int *etree; void *work; int info, lwork, nrhs, ldx; int i, m, n, nnz; doublecomplex *rhsb, *rhsx, *xact; double *R, *C; double *ferr, *berr; double u, rpg, rcond; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; extern void parse_command_line(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Enter main()"); #endif /* Defaults */ lwork = 0; nrhs = 1; equil = YES; u = 1.0; trans = NOTRANS; /* Set the default values for options argument: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options(&options); /* Can use command line input to modify the defaults. */ parse_command_line(argc, argv, &lwork, &u, &equil, &trans); options.Equil = equil; options.DiagPivotThresh = u; options.Trans = trans; if ( lwork > 0 ) { work = SUPERLU_MALLOC(lwork); if ( !work ) { ABORT("ZLINSOLX: cannot allocate work[]"); } } /* Read matrix A from a file in Harwell-Boeing format.*/ zreadhb(&m, &n, &nnz, &a, &asub, &xa); zCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_Z, SLU_GE); Astore = A.Store; printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz); if ( !(rhsb = doublecomplexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[]."); if ( !(rhsx = doublecomplexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[]."); zCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_Z, SLU_GE); zCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_Z, SLU_GE); xact = doublecomplexMalloc(n * nrhs); ldx = n; zGenXtrue(n, nrhs, xact, ldx); zFillRHS(trans, nrhs, xact, ldx, &A, &B); if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[]."); if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[]."); if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(R = (double *) SUPERLU_MALLOC(A.nrow * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for R[]."); if ( !(C = (double *) SUPERLU_MALLOC(A.ncol * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for C[]."); if ( !(ferr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for ferr[]."); if ( !(berr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for berr[]."); /* Initialize the statistics variables. */ StatInit(&stat); /* ONLY PERFORM THE LU DECOMPOSITION */ B.ncol = 0; /* Indicate not to solve the system */ zgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info); printf("LU factorization: zgssvx() returns info %d\n", info); if ( info == 0 || info == n+1 ) { if ( options.PivotGrowth ) printf("Recip. pivot growth = %e\n", rpg); if ( options.ConditionNumber ) printf("Recip. condition number = %e\n", rcond); Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/nnz); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); fflush(stdout); } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); /* ------------------------------------------------------------ NOW WE SOLVE THE LINEAR SYSTEM USING THE FACTORED FORM OF A. ------------------------------------------------------------*/ options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */ B.ncol = nrhs; /* Set the number of right-hand side */ /* Initialize the statistics variables. */ StatInit(&stat); zgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info); printf("Triangular solve: zgssvx() returns info %d\n", info); if ( info == 0 || info == n+1 ) { /* This is how you could access the solution matrix. */ doublecomplex *sol = (doublecomplex*) ((DNformat*) X.Store)->nzval; if ( options.IterRefine ) { printf("Iterative Refinement:\n"); printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"); for (i = 0; i < nrhs; ++i) printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]); } fflush(stdout); } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); SUPERLU_FREE (rhsb); SUPERLU_FREE (rhsx); SUPERLU_FREE (xact); SUPERLU_FREE (etree); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); SUPERLU_FREE (R); SUPERLU_FREE (C); SUPERLU_FREE (ferr); SUPERLU_FREE (berr); Destroy_CompCol_Matrix(&A); Destroy_SuperMatrix_Store(&B); Destroy_SuperMatrix_Store(&X); if ( lwork == 0 ) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } else if ( lwork > 0 ) { SUPERLU_FREE(work); } #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Exit main()"); #endif }
bool SuperLUSolver::Solve(SparseMatrixType& rA, VectorType& rX, VectorType& rB) { //std::cout << "matrix size in solver: " << rA.size1() << std::endl; //std::cout << "RHS size in solver SLU: " << rB.size() << std::endl; // typedef ublas::compressed_matrix<double, ublas::row_major, 0, // ublas::unbounded_array<int>, ublas::unbounded_array<double> > cm_t; //make a copy of the RHS VectorType rC = rB; superlu_options_t options; SuperLUStat_t stat; /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options(&options); options.IterRefine = SLU_DOUBLE; // options.ColPerm = MMD_AT_PLUS_A; //Fill the SuperLU matrices SuperMatrix Aslu, B, L, U; //create a copy of the matrix int *index1_vector = new (std::nothrow) int[rA.index1_data().size()]; int *index2_vector = new (std::nothrow) int[rA.index2_data().size()]; // double *values_vector = new (std::nothrow) double[rA.value_data().size()]; for( int unsigned i = 0; i < rA.index1_data().size(); i++ ) index1_vector[i] = (int)rA.index1_data()[i]; for( unsigned int i = 0; i < rA.index2_data().size(); i++ ) index2_vector[i] = (int)rA.index2_data()[i]; /* for( unsigned int i = 0; i < rA.value_data().size(); i++ ) values_vector[i] = (double)rA.value_data()[i];*/ //create a copy of the rhs vector (it will be overwritten with the solution) /* double *b_vector = new (std::nothrow) double[rB.size()]; for( unsigned int i = 0; i < rB.size(); i++ ) b_vector[i] = rB[i];*/ /* dCreate_CompCol_Matrix (&Aslu, rA.size1(), rA.size2(), rA.nnz(), values_vector, index2_vector, index1_vector, SLU_NR, SLU_D, SLU_GE );*/ //works also with dCreate_CompCol_Matrix dCreate_CompRow_Matrix (&Aslu, rA.size1(), rA.size2(), rA.nnz(), rA.value_data().begin(), index2_vector, //can not avoid a copy as ublas uses unsigned int internally index1_vector, //can not avoid a copy as ublas uses unsigned int internally SLU_NR, SLU_D, SLU_GE ); dCreate_Dense_Matrix (&B, rB.size(), 1,&rB[0],rB.size(),SLU_DN, SLU_D, SLU_GE); //allocate memory for permutation arrays int* perm_c; int* perm_r; if ( !(perm_c = intMalloc(rA.size1())) ) ABORT("Malloc fails for perm_c[]."); if ( !(perm_r = intMalloc(rA.size2())) ) ABORT("Malloc fails for perm_r[]."); //initialize container for statistical data StatInit(&stat); //call solver routine int info; dgssv(&options, &Aslu, perm_c, perm_r, &L, &U, &B, &stat, &info); //print output if (options.PrintStat) { StatPrint(&stat); } //resubstitution of results #pragma omp parallel for for(int i=0; i<static_cast<int>(rB.size()); i++ ) rX[i] = rB[i]; // B(i,0); //recover the RHS rB=rC; //deallocate memory used StatFree(&stat); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); Destroy_SuperMatrix_Store(&Aslu); //note that by using the "store" function we will take care of deallocation ourselves Destroy_SuperMatrix_Store(&B); Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); delete [] index1_vector; delete [] index2_vector; // delete [] b_vector; //CHECK WITH VALGRIND IF THIS IS NEEDED ...or if it is done by the lines above //deallocate tempory storage used for the matrix // if(b_vector!=NULL) delete [] index1_vector; // // if(b_vector!=NULL) delete [] index2_vector; // if(b_vector!=NULL) delete [] values_vector; // if(b_vector!=NULL) delete [] b_vector; return true; }
main(int argc, char *argv[]) { SuperMatrix A; NCformat *Astore; double *a; int *asub, *xa; int *perm_c; /* column permutation vector */ int *perm_r; /* row permutations from partial pivoting */ SuperMatrix L; /* factor L */ SCformat *Lstore; SuperMatrix U; /* factor U */ NCformat *Ustore; SuperMatrix B; int nrhs, ldx, info, m, n, nnz; double *xact, *rhs; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Enter main()"); #endif /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options(&options); /* Now we modify the default options to use the symmetric mode. */ options.SymmetricMode = YES; options.ColPerm = MMD_AT_PLUS_A; options.DiagPivotThresh = 0.001; #if 1 /* Read matrix A from a file in Harwell-Boeing format.*/ if (argc < 2) { printf("Usage:\n%s [OPTION] < [INPUT] > [OUTPUT]\nOPTION:\n" "-h -hb:\n\t[INPUT] is a Harwell-Boeing format matrix.\n" "-r -rb:\n\t[INPUT] is a Rutherford-Boeing format matrix.\n" "-t -triplet:\n\t[INPUT] is a triplet format matrix.\n", argv[0]); return 0; } else { switch (argv[1][1]) { case 'H': case 'h': printf("Input a Harwell-Boeing format matrix:\n"); dreadhb(&m, &n, &nnz, &a, &asub, &xa); break; case 'R': case 'r': printf("Input a Rutherford-Boeing format matrix:\n"); dreadrb(&m, &n, &nnz, &a, &asub, &xa); break; case 'T': case 't': printf("Input a triplet format matrix:\n"); dreadtriple(&m, &n, &nnz, &a, &asub, &xa); break; default: printf("Unrecognized format.\n"); return 0; } } #else /* Read the matrix in Harwell-Boeing format. */ dreadhb(&m, &n, &nnz, &a, &asub, &xa); #endif dCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE); Astore = A.Store; printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz); nrhs = 1; if ( !(rhs = doubleMalloc(m * nrhs)) ) ABORT("Malloc fails for rhs[]."); dCreate_Dense_Matrix(&B, m, nrhs, rhs, m, SLU_DN, SLU_D, SLU_GE); xact = doubleMalloc(n * nrhs); ldx = n; dGenXtrue(n, nrhs, xact, ldx); dFillRHS(options.Trans, nrhs, xact, ldx, &A, &B); if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[]."); /* Initialize the statistics variables. */ StatInit(&stat); dgssv(&options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info); if ( info == 0 ) { /* This is how you could access the solution matrix. */ double *sol = (double*) ((DNformat*) B.Store)->nzval; /* Compute the infinity norm of the error. */ dinf_norm_error(nrhs, &B, xact); Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/nnz); dQuerySpace(&L, &U, &mem_usage); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); } else { printf("dgssv() error returns INFO= %d\n", info); if ( info <= n ) { /* factorization completes */ dQuerySpace(&L, &U, &mem_usage); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); } } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); SUPERLU_FREE (rhs); SUPERLU_FREE (xact); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); Destroy_CompCol_Matrix(&A); Destroy_SuperMatrix_Store(&B); Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Exit main()"); #endif }
void pzgssv(int nprocs, SuperMatrix *A, int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U, SuperMatrix *B, int *info ) { /* * -- SuperLU MT routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley, * and Xerox Palo Alto Research Center. * September 10, 2007 * * Purpose * ======= * * PZGSSV solves the system of linear equations A*X=B, using the parallel * LU factorization routine PZGSTRF. It performs the following steps: * * 1. If A is stored column-wise (A->Stype = NC): * * 1.1. Permute the columns of A, forming A*Pc, where Pc is a * permutation matrix. * For more details of this step, see sp_preorder.c. * * 1.2. Factor A as Pr*A*Pc=L*U with the permutation Pr determined * by Gaussian elimination with partial pivoting. * L is unit lower triangular with offdiagonal entries * bounded by 1 in magnitude, and U is upper triangular. * * 1.3. Solve the system of equations A*X=B using the factored * form of A. * * 2. If A is stored row-wise (A->Stype = NR), apply the above algorithm * to the tranpose of A: * * 2.1. Permute columns of tranpose(A) (rows of A), * forming transpose(A)*Pc, where Pc is a permutation matrix. * For more details of this step, see sp_preorder.c. * * 2.2. Factor A as Pr*transpose(A)*Pc=L*U with the permutation Pr * determined by Gaussian elimination with partial pivoting. * L is unit lower triangular with offdiagonal entries * bounded by 1 in magnitude, and U is upper triangular. * * 2.3. Solve the system of equations A*X=B using the factored * form of A. * * See supermatrix.h for the definition of "SuperMatrix" structure. * * * Arguments * ========= * * nprocs (input) int * Number of processes (or threads) to be spawned and used to perform * the LU factorization by pzgstrf(). There is a single thread of * control to call pzgstrf(), and all threads spawned by pzgstrf() * are terminated before returning from pzgstrf(). * * A (input) SuperMatrix* * Matrix A in A*X=B, of dimension (A->nrow, A->ncol), where * A->nrow = A->ncol. Currently, the type of A can be: * Stype = NC or NR; Dtype = _D; Mtype = GE. In the future, * more general A will be handled. * * perm_c (input/output) int* * If A->Stype=NC, column permutation vector of size A->ncol, * which defines the permutation matrix Pc; perm_c[i] = j means * column i of A is in position j in A*Pc. * On exit, perm_c may be overwritten by the product of the input * perm_c and a permutation that postorders the elimination tree * of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree * is already in postorder. * * If A->Stype=NR, column permutation vector of size A->nrow * which describes permutation of columns of tranpose(A) * (rows of A) as described above. * * perm_r (output) int*, * If A->Stype=NR, row permutation vector of size A->nrow, * which defines the permutation matrix Pr, and is determined * by partial pivoting. perm_r[i] = j means row i of A is in * position j in Pr*A. * * If A->Stype=NR, permutation vector of size A->ncol, which * determines permutation of rows of transpose(A) * (columns of A) as described above. * * L (output) SuperMatrix* * The factor L from the factorization * Pr*A*Pc=L*U (if A->Stype=NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype=NR). * Uses compressed row subscripts storage for supernodes, i.e., * L has types: Stype = SCP, Dtype = _D, Mtype = TRLU. * * U (output) SuperMatrix* * The factor U from the factorization * Pr*A*Pc=L*U (if A->Stype=NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype=NR). * Use column-wise storage scheme, i.e., U has types: * Stype = NCP, Dtype = _D, Mtype = TRU. * * B (input/output) SuperMatrix* * B has types: Stype = DN, Dtype = _D, Mtype = GE. * On entry, the right hand side matrix. * On exit, the solution matrix if info = 0; * * info (output) int* * = 0: successful exit * > 0: if info = i, and i is * <= A->ncol: U(i,i) is exactly zero. The factorization has * been completed, but the factor U is exactly singular, * so the solution could not be computed. * > A->ncol: number of bytes allocated when memory allocation * failure occurred, plus A->ncol. * */ trans_t trans; NCformat *Astore; DNformat *Bstore; SuperMatrix *AA; /* A in NC format used by the factorization routine.*/ SuperMatrix AC; /* Matrix postmultiplied by Pc */ int i, n, panel_size, relax; fact_t fact; yes_no_t refact, usepr; double diag_pivot_thresh, drop_tol; void *work; int lwork; superlumt_options_t superlumt_options; Gstat_t Gstat; double t; /* Temporary time */ double *utime; flops_t *ops, flopcnt; /* ------------------------------------------------------------ Test the input parameters. ------------------------------------------------------------*/ Astore = A->Store; Bstore = B->Store; *info = 0; if ( nprocs <= 0 ) *info = -1; else if ( A->nrow != A->ncol || A->nrow < 0 || (A->Stype != SLU_NC && A->Stype != SLU_NR) || A->Dtype != SLU_Z || A->Mtype != SLU_GE ) *info = -2; else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(1, A->nrow) )*info = -7; if ( *info != 0 ) { i = -(*info); xerbla_("pzgssv", &i); return; } #if 0 /* Use the best sequential code. if this part is commented out, we will use the parallel code run on one processor. */ if ( nprocs == 1 ) { return; } #endif fact = EQUILIBRATE; refact = NO; trans = NOTRANS; panel_size = sp_ienv(1); relax = sp_ienv(2); diag_pivot_thresh = 1.0; usepr = NO; drop_tol = 0.0; work = NULL; lwork = 0; /* ------------------------------------------------------------ Allocate storage and initialize statistics variables. ------------------------------------------------------------*/ n = A->ncol; StatAlloc(n, nprocs, panel_size, relax, &Gstat); StatInit(n, nprocs, &Gstat); utime = Gstat.utime; ops = Gstat.ops; /* ------------------------------------------------------------ Convert A to NC format when necessary. ------------------------------------------------------------*/ if ( A->Stype == SLU_NR ) { NRformat *Astore = A->Store; AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); zCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, Astore->nzval, Astore->colind, Astore->rowptr, SLU_NC, A->Dtype, A->Mtype); trans = TRANS; } else if ( A->Stype == SLU_NC ) AA = A; /* ------------------------------------------------------------ Initialize the option structure superlumt_options using the user-input parameters; Apply perm_c to the columns of original A to form AC. ------------------------------------------------------------*/ pzgstrf_init(nprocs, fact, trans, refact, panel_size, relax, diag_pivot_thresh, usepr, drop_tol, perm_c, perm_r, work, lwork, AA, &AC, &superlumt_options, &Gstat); /* ------------------------------------------------------------ Compute the LU factorization of A. The following routine will create nprocs threads. ------------------------------------------------------------*/ pzgstrf(&superlumt_options, &AC, perm_r, L, U, &Gstat, info); flopcnt = 0; for (i = 0; i < nprocs; ++i) flopcnt += Gstat.procstat[i].fcops; ops[FACT] = flopcnt; #if ( PRNTlevel==1 ) printf("nprocs = %d, flops %e, Mflops %.2f\n", nprocs, flopcnt, flopcnt/utime[FACT]*1e-6); printf("Parameters: w %d, relax %d, maxsuper %d, rowblk %d, colblk %d\n", sp_ienv(1), sp_ienv(2), sp_ienv(3), sp_ienv(4), sp_ienv(5)); fflush(stdout); #endif /* ------------------------------------------------------------ Solve the system A*X=B, overwriting B with X. ------------------------------------------------------------*/ if ( *info == 0 ) { t = SuperLU_timer_(); zgstrs (trans, L, U, perm_r, perm_c, B, &Gstat, info); utime[SOLVE] = SuperLU_timer_() - t; ops[SOLVE] = ops[TRISOLVE]; } /* ------------------------------------------------------------ Deallocate storage after factorization. ------------------------------------------------------------*/ pxgstrf_finalize(&superlumt_options, &AC); if ( A->Stype == SLU_NR ) { Destroy_SuperMatrix_Store(AA); SUPERLU_FREE(AA); } /* ------------------------------------------------------------ Print timings, then deallocate statistic variables. ------------------------------------------------------------*/ #ifdef PROFILE { SCPformat *Lstore = (SCPformat *) L->Store; ParallelProfile(n, Lstore->nsuper+1, Gstat.num_panels, nprocs, &Gstat); } #endif PrintStat(&Gstat); StatFree(&Gstat); }
static PyObject *Py_sgssv (PyObject *self, PyObject *args, PyObject *kwdict) { PyObject *Py_B=NULL, *Py_X=NULL; PyArrayObject *nzvals=NULL; PyArrayObject *colind=NULL, *rowptr=NULL; int N, nnz; int info; int csc=0, permc_spec=2; int *perm_r=NULL, *perm_c=NULL; SuperMatrix A, B, L, U; superlu_options_t options; SuperLUStat_t stat; static char *kwlist[] = {"N","nnz","nzvals","colind","rowptr","B", "csc", "permc_spec",NULL}; /* Get input arguments */ if (!PyArg_ParseTupleAndKeywords(args, kwdict, "iiO!O!O!O|ii", kwlist, &N, &nnz, &PyArray_Type, &nzvals, &PyArray_Type, &colind, &PyArray_Type, &rowptr, &Py_B, &csc, &permc_spec)) return NULL; if (!_CHECK_INTEGER(colind) || !_CHECK_INTEGER(rowptr)) { PyErr_SetString(PyExc_TypeError, "colind and rowptr must be of type cint"); return NULL; } /* Create Space for output */ Py_X = PyArray_CopyFromObject(Py_B,PyArray_FLOAT,1,2); if (Py_X == NULL) return NULL; if (csc) { if (NCFormat_from_spMatrix(&A, N, N, nnz, nzvals, colind, rowptr, PyArray_FLOAT)) { Py_DECREF(Py_X); return NULL; } } else { if (NRFormat_from_spMatrix(&A, N, N, nnz, nzvals, colind, rowptr, PyArray_FLOAT)) { Py_DECREF(Py_X); return NULL; } } if (DenseSuper_from_Numeric(&B, Py_X)) { Destroy_SuperMatrix_Store(&A); Py_DECREF(Py_X); return NULL; } /* B and Py_X share same data now but Py_X "owns" it */ /* Setup options */ if (setjmp(_superlu_py_jmpbuf)) goto fail; else { perm_c = intMalloc(N); perm_r = intMalloc(N); set_default_options(&options); options.ColPerm=superlu_module_getpermc(permc_spec); StatInit(&stat); /* Compute direct inverse of sparse Matrix */ sgssv(&options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info); } SUPERLU_FREE(perm_r); SUPERLU_FREE(perm_c); Destroy_SuperMatrix_Store(&A); Destroy_SuperMatrix_Store(&B); Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); StatFree(&stat); return Py_BuildValue("Ni", Py_X, info); fail: SUPERLU_FREE(perm_r); SUPERLU_FREE(perm_c); Destroy_SuperMatrix_Store(&A); Destroy_SuperMatrix_Store(&B); Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); StatFree(&stat); Py_XDECREF(Py_X); return NULL; }
int main(int argc, char *argv[]) { void smatvec_mult(float alpha, float x[], float beta, float y[]); void spsolve(int n, float x[], float y[]); extern int sfgmr( int n, void (*matvec_mult)(float, float [], float, float []), void (*psolve)(int n, float [], float[]), float *rhs, float *sol, double tol, int restrt, int *itmax, FILE *fits); extern int sfill_diag(int n, NCformat *Astore); char equed[1] = {'B'}; yes_no_t equil; trans_t trans; SuperMatrix A, L, U; SuperMatrix B, X; NCformat *Astore; NCformat *Ustore; SCformat *Lstore; GlobalLU_t Glu; /* facilitate multiple factorizations with SamePattern_SameRowPerm */ float *a; int *asub, *xa; int *etree; int *perm_c; /* column permutation vector */ int *perm_r; /* row permutations from partial pivoting */ int nrhs, ldx, lwork, info, m, n, nnz; float *rhsb, *rhsx, *xact; float *work = NULL; float *R, *C; float u, rpg, rcond; float zero = 0.0; float one = 1.0; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; FILE *fp = stdin; int restrt, iter, maxit, i; double resid; float *x, *b; #ifdef DEBUG extern int num_drop_L, num_drop_U; #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Enter main()"); #endif /* Defaults */ lwork = 0; nrhs = 1; trans = NOTRANS; /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 0.1; //different from complete LU options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; options.RowPerm = LargeDiag; options.ILU_DropTol = 1e-4; options.ILU_FillTol = 1e-2; options.ILU_FillFactor = 10.0; options.ILU_DropRule = DROP_BASIC | DROP_AREA; options.ILU_Norm = INF_NORM; options.ILU_MILU = SILU; */ ilu_set_default_options(&options); /* Modify the defaults. */ options.PivotGrowth = YES; /* Compute reciprocal pivot growth */ options.ConditionNumber = YES;/* Compute reciprocal condition number */ if ( lwork > 0 ) { work = SUPERLU_MALLOC(lwork); if ( !work ) ABORT("Malloc fails for work[]."); } /* Read matrix A from a file in Harwell-Boeing format.*/ if (argc < 2) { printf("Usage:\n%s [OPTION] < [INPUT] > [OUTPUT]\nOPTION:\n" "-h -hb:\n\t[INPUT] is a Harwell-Boeing format matrix.\n" "-r -rb:\n\t[INPUT] is a Rutherford-Boeing format matrix.\n" "-t -triplet:\n\t[INPUT] is a triplet format matrix.\n", argv[0]); return 0; } else { switch (argv[1][1]) { case 'H': case 'h': printf("Input a Harwell-Boeing format matrix:\n"); sreadhb(fp, &m, &n, &nnz, &a, &asub, &xa); break; case 'R': case 'r': printf("Input a Rutherford-Boeing format matrix:\n"); sreadrb(&m, &n, &nnz, &a, &asub, &xa); break; case 'T': case 't': printf("Input a triplet format matrix:\n"); sreadtriple(&m, &n, &nnz, &a, &asub, &xa); break; default: printf("Unrecognized format.\n"); return 0; } } sCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_S, SLU_GE); Astore = A.Store; sfill_diag(n, Astore); printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz); fflush(stdout); /* Generate the right-hand side */ if ( !(rhsb = floatMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[]."); if ( !(rhsx = floatMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[]."); sCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_S, SLU_GE); sCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_S, SLU_GE); xact = floatMalloc(n * nrhs); ldx = n; sGenXtrue(n, nrhs, xact, ldx); sFillRHS(trans, nrhs, xact, ldx, &A, &B); if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[]."); if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[]."); if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(R = (float *) SUPERLU_MALLOC(A.nrow * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for R[]."); if ( !(C = (float *) SUPERLU_MALLOC(A.ncol * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for C[]."); info = 0; #ifdef DEBUG num_drop_L = 0; num_drop_U = 0; #endif /* Initialize the statistics variables. */ StatInit(&stat); /* Compute the incomplete factorization and compute the condition number and pivot growth using dgsisx. */ B.ncol = 0; /* not to perform triangular solution */ sgsisx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, &Glu, &mem_usage, &stat, &info); /* Set RHS for GMRES. */ if (!(b = floatMalloc(m))) ABORT("Malloc fails for b[]."); if (*equed == 'R' || *equed == 'B') { for (i = 0; i < n; ++i) b[i] = rhsb[i] * R[i]; } else { for (i = 0; i < m; i++) b[i] = rhsb[i]; } printf("sgsisx(): info %d, equed %c\n", info, equed[0]); if (info > 0 || rcond < 1e-8 || rpg > 1e8) printf("WARNING: This preconditioner might be unstable.\n"); if ( info == 0 || info == n+1 ) { if ( options.PivotGrowth == YES ) printf("Recip. pivot growth = %e\n", rpg); if ( options.ConditionNumber == YES ) printf("Recip. condition number = %e\n", rcond); } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("n(A) = %d, nnz(A) = %d\n", n, Astore->nnz); printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("Fill ratio: nnz(F)/nnz(A) = %.3f\n", ((double)(Lstore->nnz) + (double)(Ustore->nnz) - (double)n) / (double)Astore->nnz); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); fflush(stdout); /* Set the global variables. */ GLOBAL_A = &A; GLOBAL_L = &L; GLOBAL_U = &U; GLOBAL_STAT = &stat; GLOBAL_PERM_C = perm_c; GLOBAL_PERM_R = perm_r; GLOBAL_OPTIONS = &options; GLOBAL_R = R; GLOBAL_C = C; GLOBAL_MEM_USAGE = &mem_usage; /* Set the options to do solve-only. */ options.Fact = FACTORED; options.PivotGrowth = NO; options.ConditionNumber = NO; /* Set the variables used by GMRES. */ restrt = SUPERLU_MIN(n / 3 + 1, 50); maxit = 1000; iter = maxit; resid = 1e-8; if (!(x = floatMalloc(n))) ABORT("Malloc fails for x[]."); if (info <= n + 1) { int i_1 = 1; double maxferr = 0.0, nrmA, nrmB, res, t; float temp; extern float snrm2_(int *, float [], int *); extern void saxpy_(int *, float *, float [], int *, float [], int *); /* Initial guess */ for (i = 0; i < n; i++) x[i] = zero; t = SuperLU_timer_(); /* Call GMRES */ sfgmr(n, smatvec_mult, spsolve, b, x, resid, restrt, &iter, stdout); t = SuperLU_timer_() - t; /* Output the result. */ nrmA = snrm2_(&(Astore->nnz), (float *)((DNformat *)A.Store)->nzval, &i_1); nrmB = snrm2_(&m, b, &i_1); sp_sgemv("N", -1.0, &A, x, 1, 1.0, b, 1); res = snrm2_(&m, b, &i_1); resid = res / nrmB; printf("||A||_F = %.1e, ||B||_2 = %.1e, ||B-A*X||_2 = %.1e, " "relres = %.1e\n", nrmA, nrmB, res, resid); if (iter >= maxit) { if (resid >= 1.0) iter = -180; else if (resid > 1e-8) iter = -111; } printf("iteration: %d\nresidual: %.1e\nGMRES time: %.2f seconds.\n", iter, resid, t); /* Scale the solution back if equilibration was performed. */ if (*equed == 'C' || *equed == 'B') for (i = 0; i < n; i++) x[i] *= C[i]; for (i = 0; i < m; i++) { maxferr = SUPERLU_MAX(maxferr, fabs(x[i] - xact[i])); } printf("||X-X_true||_oo = %.1e\n", maxferr); } #ifdef DEBUG printf("%d entries in L and %d entries in U dropped.\n", num_drop_L, num_drop_U); #endif fflush(stdout); if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); SUPERLU_FREE (rhsb); SUPERLU_FREE (rhsx); SUPERLU_FREE (xact); SUPERLU_FREE (etree); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); SUPERLU_FREE (R); SUPERLU_FREE (C); Destroy_CompCol_Matrix(&A); Destroy_SuperMatrix_Store(&B); Destroy_SuperMatrix_Store(&X); if ( lwork >= 0 ) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } SUPERLU_FREE(b); SUPERLU_FREE(x); #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Exit main()"); #endif return 0; }
int HYPRE_ParCSR_SuperLUSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr, HYPRE_ParVector b, HYPRE_ParVector x ) { #ifdef HAVE_SUPERLU int startRow, endRow, nrows, *partition, *AdiagI, *AdiagJ, nnz; int irow, colNum, index, *cscI, *cscJ, jcol, *colLengs; int *etree, permcSpec, lwork, panelSize, relax, info; double *AdiagA, *cscA, diagPivotThresh, dropTol; char refact[1]; hypre_CSRMatrix *Adiag; HYPRE_SuperLU *sluPtr; SuperMatrix sluAmat, auxAmat; superlu_options_t slu_options; SuperLUStat_t slu_stat; /* ---------------------------------------------------------------- */ /* get matrix information */ /* ---------------------------------------------------------------- */ sluPtr = (HYPRE_SuperLU *) solver; assert ( sluPtr != NULL ); HYPRE_ParCSRMatrixGetRowPartitioning( A_csr, &partition ); startRow = partition[0]; endRow = partition[1] - 1; nrows = endRow - startRow + 1; free( partition ); if ( startRow != 0 ) { printf("HYPRE_ParCSR_SuperLUSetup ERROR - start row != 0.\n"); return -1; } /* ---------------------------------------------------------------- */ /* get hypre matrix */ /* ---------------------------------------------------------------- */ Adiag = hypre_ParCSRMatrixDiag((hypre_ParCSRMatrix *) A_csr); AdiagI = hypre_CSRMatrixI(Adiag); AdiagJ = hypre_CSRMatrixJ(Adiag); AdiagA = hypre_CSRMatrixData(Adiag); nnz = AdiagI[nrows]; /* ---------------------------------------------------------------- */ /* convert the csr matrix into csc matrix */ /* ---------------------------------------------------------------- */ colLengs = (int *) malloc(nrows * sizeof(int)); for ( irow = 0; irow < nrows; irow++ ) colLengs[irow] = 0; for ( irow = 0; irow < nrows; irow++ ) for ( jcol = AdiagI[irow]; jcol < AdiagI[irow+1]; jcol++ ) colLengs[AdiagJ[jcol]]++; cscJ = (int *) malloc( (nrows+1) * sizeof(int) ); cscI = (int *) malloc( nnz * sizeof(int) ); cscA = (double *) malloc( nnz * sizeof(double) ); cscJ[0] = 0; nnz = 0; for ( jcol = 1; jcol <= nrows; jcol++ ) { nnz += colLengs[jcol-1]; cscJ[jcol] = nnz; } for ( irow = 0; irow < nrows; irow++ ) { for ( jcol = AdiagI[irow]; jcol < AdiagI[irow+1]; jcol++ ) { colNum = AdiagJ[jcol]; index = cscJ[colNum]++; cscI[index] = irow; cscA[index] = AdiagA[jcol]; } } cscJ[0] = 0; nnz = 0; for ( jcol = 1; jcol <= nrows; jcol++ ) { nnz += colLengs[jcol-1]; cscJ[jcol] = nnz; } free(colLengs); /* ---------------------------------------------------------------- */ /* create SuperMatrix */ /* ---------------------------------------------------------------- */ dCreate_CompCol_Matrix(&sluAmat,nrows,nrows,cscJ[nrows],cscA,cscI, cscJ, SLU_NC, SLU_D, SLU_GE); etree = (int *) malloc(nrows * sizeof(int)); sluPtr->permC_ = (int *) malloc(nrows * sizeof(int)); sluPtr->permR_ = (int *) malloc(nrows * sizeof(int)); permcSpec = 0; get_perm_c(permcSpec, &sluAmat, sluPtr->permC_); slu_options.Fact = DOFACT; slu_options.SymmetricMode = NO; sp_preorder(&slu_options, &sluAmat, sluPtr->permC_, etree, &auxAmat); diagPivotThresh = 1.0; dropTol = 0.0; panelSize = sp_ienv(1); relax = sp_ienv(2); StatInit(&slu_stat); lwork = 0; slu_options.ColPerm = MY_PERMC; slu_options.DiagPivotThresh = diagPivotThresh; dgstrf(&slu_options, &auxAmat, dropTol, relax, panelSize, etree, NULL, lwork, sluPtr->permC_, sluPtr->permR_, &(sluPtr->SLU_Lmat), &(sluPtr->SLU_Umat), &slu_stat, &info); Destroy_CompCol_Permuted(&auxAmat); Destroy_CompCol_Matrix(&sluAmat); free(etree); sluPtr->factorized_ = 1; StatFree(&slu_stat); return 0; #else printf("HYPRE_ParCSR_SuperLUSetup ERROR - SuperLU not enabled.\n"); *solver = (HYPRE_Solver) NULL; return -1; #endif }