void tlin::createS(SuperMatrix &A, int rows, int cols, int nnz) { double *values = doubleMalloc(nnz); int *rowind = intMalloc(nnz); int *colptr = intMalloc(cols + 1); dCreate_CompCol_Matrix(&A, rows, cols, nnz, values, rowind, colptr, SLU_NC, SLU_D, SLU_GE); }
void tlin::allocS(SuperMatrix *&A, int rows, int cols, int nnz) { A = (SuperMatrix *)SUPERLU_MALLOC(sizeof(SuperMatrix)); double *values = doubleMalloc(nnz); int *rowind = intMalloc(nnz); int *colptr = intMalloc(cols + 1); dCreate_CompCol_Matrix(A, rows, cols, nnz, values, rowind, colptr, SLU_NC, SLU_D, SLU_GE); }
Error_t floorplan_matrix_build ( FloorplanMatrix_t *flpmatrix, CellIndex_t nrows, CellIndex_t ncolumns, CellIndex_t nnz ) { flpmatrix->NRows = nrows ; flpmatrix->NColumns = ncolumns ; flpmatrix->NNz = nnz ; flpmatrix->RowIndices = (CellIndex_t *) malloc (sizeof(CellIndex_t) * nnz) ; if (flpmatrix->RowIndices == NULL) return TDICE_FAILURE ; flpmatrix->ColumnPointers = (CellIndex_t *) malloc (sizeof(CellIndex_t) * (ncolumns + 1)) ; if (flpmatrix->ColumnPointers == NULL) { free (flpmatrix->RowIndices) ; return TDICE_FAILURE ; } flpmatrix->Values = (Source_t *) malloc (sizeof(Source_t) * nnz) ; if (flpmatrix->Values == NULL) { free (flpmatrix->ColumnPointers) ; free (flpmatrix->RowIndices) ; return TDICE_FAILURE ; } dCreate_CompCol_Matrix (&flpmatrix->SLUMatrix, flpmatrix->NRows, flpmatrix->NColumns, flpmatrix->NNz, flpmatrix->Values, (int*) flpmatrix->RowIndices, (int*) flpmatrix->ColumnPointers, SLU_NC, SLU_D, SLU_GE) ; return TDICE_SUCCESS ; }
static SuperMatrix* supermatrix_new(adj_graph_t* graph) { SuperMatrix* A = polymec_malloc(sizeof(SuperMatrix)); // Fetch sparsity information from the graph. int* edge_offsets = adj_graph_edge_offsets(graph); int num_rows = adj_graph_num_vertices(graph); int num_edges = edge_offsets[num_rows]; int num_nz = num_rows + num_edges; // Translate the sparsity information in the graph to column indices and // row pointers. Since the graph is effectively stored in compressed // row format, we will use SuperLU's compressed row matrix format. int* row_ptrs = intMalloc(num_rows + 1); int* col_indices = intMalloc(num_nz); int* edges = adj_graph_adjacency(graph); int offset = 0; for (int i = 0; i < num_rows; ++i) { row_ptrs[i] = offset; col_indices[offset++] = i; // diagonal column // Off-diagonal columns. for (int j = edge_offsets[i]; j < edge_offsets[i+1]; ++j) col_indices[offset++] = edges[j]; int_qsort(&col_indices[row_ptrs[i]+1], edge_offsets[i+1]-edge_offsets[i]); } row_ptrs[num_rows] = offset; ASSERT(offset == num_nz); // Create zeros for the matrix. real_t* mat_zeros = SUPERLU_MALLOC(sizeof(real_t) * num_nz); memset(mat_zeros, 0, sizeof(real_t) * num_nz); // Hand over these resources to create the Supermatrix. dCreate_CompCol_Matrix(A, num_rows, num_rows, num_nz, mat_zeros, col_indices, row_ptrs, SLU_NC, SLU_D, SLU_GE); return A; }
void dgssvx(char *fact, char *trans, char *refact, SuperMatrix *A, factor_param_t *factor_params, int *perm_c, int *perm_r, int *etree, char *equed, double *R, double *C, SuperMatrix *L, SuperMatrix *U, void *work, int lwork, SuperMatrix *B, SuperMatrix *X, double *recip_pivot_growth, double *rcond, double *ferr, double *berr, mem_usage_t *mem_usage, int *info ) { /* * Purpose * ======= * * DGSSVX solves the system of linear equations A*X=B or A'*X=B, using * the LU factorization from dgstrf(). Error bounds on the solution and * a condition estimate are also provided. It performs the following steps: * * 1. If A is stored column-wise (A->Stype = NC): * * 1.1. If fact = 'E', scaling factors are computed to equilibrate the * system: * trans = 'N': diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B * trans = 'T': (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B * trans = 'C': (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A is * overwritten by diag(R)*A*diag(C) and B by diag(R)*B (if trans='N') * or diag(C)*B (if trans = 'T' or 'C'). * * 1.2. Permute columns of A, forming A*Pc, where Pc is a permutation * matrix that usually preserves sparsity. * For more details of this step, see sp_preorder.c. * * 1.3. If fact = 'N' or 'E', the LU decomposition is used to factor the * matrix A (after equilibration if fact = 'E') as Pr*A*Pc = L*U, * with Pr determined by partial pivoting. * * 1.4. Compute the reciprocal pivot growth factor. * * 1.5. If some U(i,i) = 0, so that U is exactly singular, then the * routine returns with info = i. Otherwise, the factored form of * A is used to estimate the condition number of the matrix A. If * the reciprocal of the condition number is less than machine * precision, info = A->ncol+1 is returned as a warning, but the * routine still goes on to solve for X and computes error bounds * as described below. * * 1.6. The system of equations is solved for X using the factored form * of A. * * 1.7. Iterative refinement is applied to improve the computed solution * matrix and calculate error bounds and backward error estimates * for it. * * 1.8. If equilibration was used, the matrix X is premultiplied by * diag(C) (if trans = 'N') or diag(R) (if trans = 'T' or 'C') so * that it solves the original system before equilibration. * * 2. If A is stored row-wise (A->Stype = NR), apply the above algorithm * to the transpose of A: * * 2.1. If fact = 'E', scaling factors are computed to equilibrate the * system: * trans = 'N': diag(R)*A'*diag(C) *inv(diag(C))*X = diag(R)*B * trans = 'T': (diag(R)*A'*diag(C))**T *inv(diag(R))*X = diag(C)*B * trans = 'C': (diag(R)*A'*diag(C))**H *inv(diag(R))*X = diag(C)*B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A' is * overwritten by diag(R)*A'*diag(C) and B by diag(R)*B * (if trans='N') or diag(C)*B (if trans = 'T' or 'C'). * * 2.2. Permute columns of transpose(A) (rows of A), * forming transpose(A)*Pc, where Pc is a permutation matrix that * usually preserves sparsity. * For more details of this step, see sp_preorder.c. * * 2.3. If fact = 'N' or 'E', the LU decomposition is used to factor the * transpose(A) (after equilibration if fact = 'E') as * Pr*transpose(A)*Pc = L*U with the permutation Pr determined by * partial pivoting. * * 2.4. Compute the reciprocal pivot growth factor. * * 2.5. If some U(i,i) = 0, so that U is exactly singular, then the * routine returns with info = i. Otherwise, the factored form * of transpose(A) is used to estimate the condition number of the * matrix A. If the reciprocal of the condition number * is less than machine precision, info = A->nrow+1 is returned as * a warning, but the routine still goes on to solve for X and * computes error bounds as described below. * * 2.6. The system of equations is solved for X using the factored form * of transpose(A). * * 2.7. Iterative refinement is applied to improve the computed solution * matrix and calculate error bounds and backward error estimates * for it. * * 2.8. If equilibration was used, the matrix X is premultiplied by * diag(C) (if trans = 'N') or diag(R) (if trans = 'T' or 'C') so * that it solves the original system before equilibration. * * See supermatrix.h for the definition of 'SuperMatrix' structure. * * Arguments * ========= * * fact (input) char* * Specifies whether or not the factored form of the matrix * A is supplied on entry, and if not, whether the matrix A should * be equilibrated before it is factored. * = 'F': On entry, L, U, perm_r and perm_c contain the factored * form of A. If equed is not 'N', the matrix A has been * equilibrated with scaling factors R and C. * A, L, U, perm_r are not modified. * = 'N': The matrix A will be factored, and the factors will be * stored in L and U. * = 'E': The matrix A will be equilibrated if necessary, then * factored into L and U. * * trans (input) char* * Specifies the form of the system of equations: * = 'N': A * X = B (No transpose) * = 'T': A**T * X = B (Transpose) * = 'C': A**H * X = B (Transpose) * * refact (input) char* * Specifies whether we want to re-factor the matrix. * = 'N': Factor the matrix A. * = 'Y': Matrix A was factored before, now we want to re-factor * matrix A with perm_r and etree as inputs. Use * the same storage for the L\U factors previously allocated, * expand it if necessary. User should insure to use the same * memory model. In this case, perm_r may be modified due to * different pivoting determined by diagonal threshold. * If fact = 'F', then refact is not accessed. * * A (input/output) SuperMatrix* * Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number * of the linear equations is A->nrow. Currently, the type of A can be: * Stype = NC or NR, Dtype = D_, Mtype = GE. In the future, * more general A can be handled. * * On entry, If fact = 'F' and equed is not 'N', then A must have * been equilibrated by the scaling factors in R and/or C. * A is not modified if fact = 'F' or 'N', or if fact = 'E' and * equed = 'N' on exit. * * On exit, if fact = 'E' and equed is not 'N', A is scaled as follows: * If A->Stype = NC: * equed = 'R': A := diag(R) * A * equed = 'C': A := A * diag(C) * equed = 'B': A := diag(R) * A * diag(C). * If A->Stype = NR: * equed = 'R': transpose(A) := diag(R) * transpose(A) * equed = 'C': transpose(A) := transpose(A) * diag(C) * equed = 'B': transpose(A) := diag(R) * transpose(A) * diag(C). * * factor_params (input) factor_param_t* * The structure defines the input scalar parameters, consisting of * the following fields. If factor_params = NULL, the default * values are used for all the fields; otherwise, the values * are given by the user. * - panel_size (int): Panel size. A panel consists of at most * panel_size consecutive columns. If panel_size = -1, use * default value 8. * - relax (int): To control degree of relaxing supernodes. If the * number of nodes (columns) in a subtree of the elimination * tree is less than relax, this subtree is considered as one * supernode, regardless of the row structures of those columns. * If relax = -1, use default value 8. * - diag_pivot_thresh (double): Diagonal pivoting threshold. * At step j of the Gaussian elimination, if * abs(A_jj) >= diag_pivot_thresh * (max_(i>=j) abs(A_ij)), * then use A_jj as pivot. 0 <= diag_pivot_thresh <= 1. * If diag_pivot_thresh = -1, use default value 1.0, * which corresponds to standard partial pivoting. * - drop_tol (double): Drop tolerance threshold. (NOT IMPLEMENTED) * At step j of the Gaussian elimination, if * abs(A_ij)/(max_i abs(A_ij)) < drop_tol, * then drop entry A_ij. 0 <= drop_tol <= 1. * If drop_tol = -1, use default value 0.0, which corresponds to * standard Gaussian elimination. * * perm_c (input/output) int* * If A->Stype = NC, Column permutation vector of size A->ncol, * which defines the permutation matrix Pc; perm_c[i] = j means * column i of A is in position j in A*Pc. * On exit, perm_c may be overwritten by the product of the input * perm_c and a permutation that postorders the elimination tree * of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree * is already in postorder. * * If A->Stype = NR, column permutation vector of size A->nrow, * which describes permutation of columns of transpose(A) * (rows of A) as described above. * * perm_r (input/output) int* * If A->Stype = NC, row permutation vector of size A->nrow, * which defines the permutation matrix Pr, and is determined * by partial pivoting. perm_r[i] = j means row i of A is in * position j in Pr*A. * * If A->Stype = NR, permutation vector of size A->ncol, which * determines permutation of rows of transpose(A) * (columns of A) as described above. * * If refact is not 'Y', perm_r is output argument; * If refact = 'Y', the pivoting routine will try to use the input * perm_r, unless a certain threshold criterion is violated. * In that case, perm_r is overwritten by a new permutation * determined by partial pivoting or diagonal threshold pivoting. * * etree (input/output) int*, dimension (A->ncol) * Elimination tree of Pc'*A'*A*Pc. * If fact is not 'F' and refact = 'Y', etree is an input argument, * otherwise it is an output argument. * Note: etree is a vector of parent pointers for a forest whose * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol. * * equed (input/output) char* * Specifies the form of equilibration that was done. * = 'N': No equilibration. * = 'R': Row equilibration, i.e., A was premultiplied by diag(R). * = 'C': Column equilibration, i.e., A was postmultiplied by diag(C). * = 'B': Both row and column equilibration, i.e., A was replaced * by diag(R)*A*diag(C). * If fact = 'F', equed is an input argument, otherwise it is * an output argument. * * R (input/output) double*, dimension (A->nrow) * The row scale factors for A or transpose(A). * If equed = 'R' or 'B', A (if A->Stype = NC) or transpose(A) (if * A->Stype = NR) is multiplied on the left by diag(R). * If equed = 'N' or 'C', R is not accessed. * If fact = 'F', R is an input argument; otherwise, R is output. * If fact = 'F' and equed = 'R' or 'B', each element of R must * be positive. * * C (input/output) double*, dimension (A->ncol) * The column scale factors for A or transpose(A). * If equed = 'C' or 'B', A (if A->Stype = NC) or transpose(A) (if * A->Stype = NR) is multiplied on the right by diag(C). * If equed = 'N' or 'R', C is not accessed. * If fact = 'F', C is an input argument; otherwise, C is output. * If fact = 'F' and equed = 'C' or 'B', each element of C must * be positive. * * L (output) SuperMatrix* * The factor L from the factorization * Pr*A*Pc=L*U (if A->Stype = NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = NR). * Uses compressed row subscripts storage for supernodes, i.e., * L has types: Stype = SC, Dtype = D_, Mtype = TRLU. * * U (output) SuperMatrix* * The factor U from the factorization * Pr*A*Pc=L*U (if A->Stype = NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = NR). * Uses column-wise storage scheme, i.e., U has types: * Stype = NC, Dtype = D_, Mtype = TRU. * * work (workspace/output) void*, size (lwork) (in bytes) * User supplied workspace, should be large enough * to hold data structures for factors L and U. * On exit, if fact is not 'F', L and U point to this array. * * lwork (input) int * Specifies the size of work array in bytes. * = 0: allocate space internally by system malloc; * > 0: use user-supplied work array of length lwork in bytes, * returns error if space runs out. * = -1: the routine guesses the amount of space needed without * performing the factorization, and returns it in * mem_usage->total_needed; no other side effects. * * See argument 'mem_usage' for memory usage statistics. * * B (input/output) SuperMatrix* * B has types: Stype = DN, Dtype = D_, Mtype = GE. * On entry, the right hand side matrix. * On exit, * if equed = 'N', B is not modified; otherwise * if A->Stype = NC: * if trans = 'N' and equed = 'R' or 'B', B is overwritten by * diag(R)*B; * if trans = 'T' or 'C' and equed = 'C' of 'B', B is * overwritten by diag(C)*B; * if A->Stype = NR: * if trans = 'N' and equed = 'C' or 'B', B is overwritten by * diag(C)*B; * if trans = 'T' or 'C' and equed = 'R' of 'B', B is * overwritten by diag(R)*B. * * X (output) SuperMatrix* * X has types: Stype = DN, Dtype = D_, Mtype = GE. * If info = 0 or info = A->ncol+1, X contains the solution matrix * to the original system of equations. Note that A and B are modified * on exit if equed is not 'N', and the solution to the equilibrated * system is inv(diag(C))*X if trans = 'N' and equed = 'C' or 'B', * or inv(diag(R))*X if trans = 'T' or 'C' and equed = 'R' or 'B'. * * recip_pivot_growth (output) double* * The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ). * The infinity norm is used. If recip_pivot_growth is much less * than 1, the stability of the LU factorization could be poor. * * rcond (output) double* * The estimate of the reciprocal condition number of the matrix A * after equilibration (if done). If rcond is less than the machine * precision (in particular, if rcond = 0), the matrix is singular * to working precision. This condition is indicated by a return * code of info > 0. * * FERR (output) double*, dimension (B->ncol) * The estimated forward error bound for each solution vector * X(j) (the j-th column of the solution matrix X). * If XTRUE is the true solution corresponding to X(j), FERR(j) * is an estimated upper bound for the magnitude of the largest * element in (X(j) - XTRUE) divided by the magnitude of the * largest element in X(j). The estimate is as reliable as * the estimate for RCOND, and is almost always a slight * overestimate of the true error. * * BERR (output) double*, dimension (B->ncol) * The componentwise relative backward error of each solution * vector X(j) (i.e., the smallest relative change in * any element of A or B that makes X(j) an exact solution). * * mem_usage (output) mem_usage_t* * Record the memory usage statistics, consisting of following fields: * - for_lu (float) * The amount of space used in bytes for L\U data structures. * - total_needed (float) * The amount of space needed in bytes to perform factorization. * - expansions (int) * The number of memory expansions during the LU factorization. * * info (output) int* * = 0: successful exit * < 0: if info = -i, the i-th argument had an illegal value * > 0: if info = i, and i is * <= A->ncol: U(i,i) is exactly zero. The factorization has * been completed, but the factor U is exactly * singular, so the solution and error bounds * could not be computed. * = A->ncol+1: U is nonsingular, but RCOND is less than machine * precision, meaning that the matrix is singular to * working precision. Nevertheless, the solution and * error bounds are computed because there are a number * of situations where the computed solution can be more * accurate than the value of RCOND would suggest. * > A->ncol+1: number of bytes allocated when memory allocation * failure occurred, plus A->ncol. * */ DNformat *Bstore, *Xstore; double *Bmat, *Xmat; int ldb, ldx, nrhs; SuperMatrix *AA; /* A in NC format used by the factorization routine.*/ SuperMatrix AC; /* Matrix postmultiplied by Pc */ int colequ, equil, nofact, notran, rowequ; char trant[1], norm[1]; int i, j, info1; double amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin; int relax, panel_size; double diag_pivot_thresh, drop_tol; double t0; /* temporary time */ double *utime; extern SuperLUStat_t SuperLUStat; /* External functions */ extern double dlangs(char *, SuperMatrix *); extern double dlamch_(char *); Bstore = B->Store; Xstore = X->Store; Bmat = Bstore->nzval; Xmat = Xstore->nzval; ldb = Bstore->lda; ldx = Xstore->lda; nrhs = B->ncol; #if 0 printf("dgssvx: fact=%c, trans=%c, refact=%c, equed=%c\n", *fact, *trans, *refact, *equed); #endif *info = 0; nofact = lsame_(fact, "N"); equil = lsame_(fact, "E"); notran = lsame_(trans, "N"); if (nofact || equil) { *(unsigned char *)equed = 'N'; rowequ = FALSE; colequ = FALSE; } else { rowequ = lsame_(equed, "R") || lsame_(equed, "B"); colequ = lsame_(equed, "C") || lsame_(equed, "B"); smlnum = dlamch_("Safe minimum"); bignum = 1. / smlnum; } /* Test the input parameters */ if (!nofact && !equil && !lsame_(fact, "F")) *info = -1; else if (!notran && !lsame_(trans, "T") && !lsame_(trans, "C")) *info = -2; else if ( !(lsame_(refact,"Y") || lsame_(refact, "N")) ) *info = -3; else if ( A->nrow != A->ncol || A->nrow < 0 || (A->Stype != NC && A->Stype != NR) || A->Dtype != D_ || A->Mtype != GE ) *info = -4; else if (lsame_(fact, "F") && !(rowequ || colequ || lsame_(equed, "N"))) *info = -9; else { if (rowequ) { rcmin = bignum; rcmax = 0.; for (j = 0; j < A->nrow; ++j) { rcmin = SUPERLU_MIN(rcmin, R[j]); rcmax = SUPERLU_MAX(rcmax, R[j]); } if (rcmin <= 0.) *info = -10; else if ( A->nrow > 0) rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); else rowcnd = 1.; } if (colequ && *info == 0) { rcmin = bignum; rcmax = 0.; for (j = 0; j < A->nrow; ++j) { rcmin = SUPERLU_MIN(rcmin, C[j]); rcmax = SUPERLU_MAX(rcmax, C[j]); } if (rcmin <= 0.) *info = -11; else if (A->nrow > 0) colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); else colcnd = 1.; } if (*info == 0) { if ( lwork < -1 ) *info = -15; else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) || B->Stype != DN || B->Dtype != D_ || B->Mtype != GE ) *info = -16; else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) || B->ncol != X->ncol || X->Stype != DN || X->Dtype != D_ || X->Mtype != GE ) *info = -17; } } if (*info != 0) { i = -(*info); xerbla_("dgssvx", &i); return; } /* Default values for factor_params */ panel_size = sp_ienv(1); relax = sp_ienv(2); diag_pivot_thresh = 1.0; drop_tol = 0.0; if ( factor_params != NULL ) { if ( factor_params->panel_size != -1 ) panel_size = factor_params->panel_size; if ( factor_params->relax != -1 ) relax = factor_params->relax; if ( factor_params->diag_pivot_thresh != -1 ) diag_pivot_thresh = factor_params->diag_pivot_thresh; if ( factor_params->drop_tol != -1 ) drop_tol = factor_params->drop_tol; } StatInit(panel_size, relax); utime = SuperLUStat.utime; /* Convert A to NC format when necessary. */ if ( A->Stype == NR ) { NRformat *Astore = A->Store; AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, Astore->nzval, Astore->colind, Astore->rowptr, NC, A->Dtype, A->Mtype); if ( notran ) { /* Reverse the transpose argument. */ *trant = 'T'; notran = 0; } else { *trant = 'N'; notran = 1; } } else { /* A->Stype == NC */ *trant = *trans; AA = A; } if ( equil ) { t0 = SuperLU_timer_(); /* Compute row and column scalings to equilibrate the matrix A. */ dgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1); if ( info1 == 0 ) { /* Equilibrate matrix A. */ dlaqgs(AA, R, C, rowcnd, colcnd, amax, equed); rowequ = lsame_(equed, "R") || lsame_(equed, "B"); colequ = lsame_(equed, "C") || lsame_(equed, "B"); } utime[EQUIL] = SuperLU_timer_() - t0; } /* Scale the right hand side if equilibration was performed. */ if ( notran ) { if ( rowequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Bmat[i + j*ldb] *= R[i]; } } } else if ( colequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Bmat[i + j*ldb] *= C[i]; } } if ( nofact || equil ) { t0 = SuperLU_timer_(); sp_preorder(refact, AA, perm_c, etree, &AC); utime[ETREE] = SuperLU_timer_() - t0; /* printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", relax, panel_size, sp_ienv(3), sp_ienv(4)); fflush(stdout); */ /* Compute the LU factorization of A*Pc. */ t0 = SuperLU_timer_(); dgstrf(refact, &AC, diag_pivot_thresh, drop_tol, relax, panel_size, etree, work, lwork, perm_r, perm_c, L, U, info); utime[FACT] = SuperLU_timer_() - t0; if ( lwork == -1 ) { mem_usage->total_needed = *info - A->ncol; return; } } if ( *info > 0 ) { if ( *info <= A->ncol ) { /* Compute the reciprocal pivot growth factor of the leading rank-deficient *info columns of A. */ *recip_pivot_growth = dPivotGrowth(*info, AA, perm_c, L, U); } return; } /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */ *recip_pivot_growth = dPivotGrowth(A->ncol, AA, perm_c, L, U); /* Estimate the reciprocal of the condition number of A. */ t0 = SuperLU_timer_(); if ( notran ) { *(unsigned char *)norm = '1'; } else { *(unsigned char *)norm = 'I'; } anorm = dlangs(norm, AA); dgscon(norm, L, U, anorm, rcond, info); utime[RCOND] = SuperLU_timer_() - t0; /* Compute the solution matrix X. */ for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */ for (i = 0; i < B->nrow; i++) Xmat[i + j*ldx] = Bmat[i + j*ldb]; t0 = SuperLU_timer_(); dgstrs (trant, L, U, perm_r, perm_c, X, info); utime[SOLVE] = SuperLU_timer_() - t0; /* Use iterative refinement to improve the computed solution and compute error bounds and backward error estimates for it. */ t0 = SuperLU_timer_(); dgsrfs(trant, AA, L, U, perm_r, perm_c, equed, R, C, B, X, ferr, berr, info); utime[REFINE] = SuperLU_timer_() - t0; /* Transform the solution matrix X to a solution of the original system. */ if ( notran ) { if ( colequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Xmat[i + j*ldx] *= C[i]; } } } else if ( rowequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Xmat[i + j*ldx] *= R[i]; } } /* Set INFO = A->ncol+1 if the matrix is singular to working precision. */ if ( *rcond < dlamch_("E") ) *info = A->ncol + 1; dQuerySpace(L, U, panel_size, mem_usage); if ( nofact || equil ) Destroy_CompCol_Permuted(&AC); if ( A->Stype == NR ) { Destroy_SuperMatrix_Store(AA); SUPERLU_FREE(AA); } /* PrintStat( &SuperLUStat ); */ StatFree(); }
int main(int argc, char *argv[]) { /* * Purpose * ======= * * DDRIVE is the main test program for the DOUBLE linear * equation driver routines DGSSV and DGSSVX. * * The program is invoked by a shell script file -- dtest.csh. * The output from the tests are written into a file -- dtest.out. * * ===================================================================== */ double *a, *a_save; int *asub, *asub_save; int *xa, *xa_save; SuperMatrix A, B, X, L, U; SuperMatrix ASAV, AC; GlobalLU_t Glu; /* Not needed on return. */ mem_usage_t mem_usage; int *perm_r; /* row permutation from partial pivoting */ int *perm_c, *pc_save; /* column permutation */ int *etree; double zero = 0.0; double *R, *C; double *ferr, *berr; double *rwork; double *wwork; void *work; int info, lwork, nrhs, panel_size, relax; int m, n, nnz; double *xact; double *rhsb, *solx, *bsav; int ldb, ldx; double rpg, rcond; int i, j, k1; double rowcnd, colcnd, amax; int maxsuper, rowblk, colblk; int prefact, nofact, equil, iequed; int nt, nrun, nfail, nerrs, imat, fimat, nimat; int nfact, ifact, itran; int kl, ku, mode, lda; int zerot, izero, ioff; double u; double anorm, cndnum; double *Afull; double result[NTESTS]; superlu_options_t options; fact_t fact; trans_t trans; SuperLUStat_t stat; static char matrix_type[8]; static char equed[1], path[4], sym[1], dist[1]; FILE *fp; /* Fixed set of parameters */ int iseed[] = {1988, 1989, 1990, 1991}; static char equeds[] = {'N', 'R', 'C', 'B'}; static fact_t facts[] = {FACTORED, DOFACT, SamePattern, SamePattern_SameRowPerm}; static trans_t transs[] = {NOTRANS, TRANS, CONJ}; /* Some function prototypes */ extern int dgst01(int, int, SuperMatrix *, SuperMatrix *, SuperMatrix *, int *, int *, double *); extern int dgst02(trans_t, int, int, int, SuperMatrix *, double *, int, double *, int, double *resid); extern int dgst04(int, int, double *, int, double *, int, double rcond, double *resid); extern int dgst07(trans_t, int, int, SuperMatrix *, double *, int, double *, int, double *, int, double *, double *, double *); extern int dlatb4_slu(char *, int *, int *, int *, char *, int *, int *, double *, int *, double *, char *); extern int dlatms_slu(int *, int *, char *, int *, char *, double *d, int *, double *, double *, int *, int *, char *, double *, int *, double *, int *); extern int sp_dconvert(int, int, double *, int, int, int, double *a, int *, int *, int *); /* Executable statements */ strcpy(path, "DGE"); nrun = 0; nfail = 0; nerrs = 0; /* Defaults */ lwork = 0; n = 1; nrhs = 1; panel_size = sp_ienv(1); relax = sp_ienv(2); u = 1.0; strcpy(matrix_type, "LA"); parse_command_line(argc, argv, matrix_type, &n, &panel_size, &relax, &nrhs, &maxsuper, &rowblk, &colblk, &lwork, &u, &fp); if ( lwork > 0 ) { work = SUPERLU_MALLOC(lwork); if ( !work ) { fprintf(stderr, "expert: cannot allocate %d bytes\n", lwork); exit (-1); } } /* Set the default input options. */ set_default_options(&options); options.DiagPivotThresh = u; options.PrintStat = NO; options.PivotGrowth = YES; options.ConditionNumber = YES; options.IterRefine = SLU_DOUBLE; if ( strcmp(matrix_type, "LA") == 0 ) { /* Test LAPACK matrix suite. */ m = n; lda = SUPERLU_MAX(n, 1); nnz = n * n; /* upper bound */ fimat = 1; nimat = NTYPES; Afull = doubleCalloc(lda * n); dallocateA(n, nnz, &a, &asub, &xa); } else { /* Read a sparse matrix */ fimat = nimat = 0; dreadhb(fp, &m, &n, &nnz, &a, &asub, &xa); } dallocateA(n, nnz, &a_save, &asub_save, &xa_save); rhsb = doubleMalloc(m * nrhs); bsav = doubleMalloc(m * nrhs); solx = doubleMalloc(n * nrhs); ldb = m; ldx = n; dCreate_Dense_Matrix(&B, m, nrhs, rhsb, ldb, SLU_DN, SLU_D, SLU_GE); dCreate_Dense_Matrix(&X, n, nrhs, solx, ldx, SLU_DN, SLU_D, SLU_GE); xact = doubleMalloc(n * nrhs); etree = intMalloc(n); perm_r = intMalloc(n); perm_c = intMalloc(n); pc_save = intMalloc(n); R = (double *) SUPERLU_MALLOC(m*sizeof(double)); C = (double *) SUPERLU_MALLOC(n*sizeof(double)); ferr = (double *) SUPERLU_MALLOC(nrhs*sizeof(double)); berr = (double *) SUPERLU_MALLOC(nrhs*sizeof(double)); j = SUPERLU_MAX(m,n) * SUPERLU_MAX(4,nrhs); rwork = (double *) SUPERLU_MALLOC(j*sizeof(double)); for (i = 0; i < j; ++i) rwork[i] = 0.; if ( !R ) ABORT("SUPERLU_MALLOC fails for R"); if ( !C ) ABORT("SUPERLU_MALLOC fails for C"); if ( !ferr ) ABORT("SUPERLU_MALLOC fails for ferr"); if ( !berr ) ABORT("SUPERLU_MALLOC fails for berr"); if ( !rwork ) ABORT("SUPERLU_MALLOC fails for rwork"); wwork = doubleCalloc( SUPERLU_MAX(m,n) * SUPERLU_MAX(4,nrhs) ); for (i = 0; i < n; ++i) perm_c[i] = pc_save[i] = i; options.ColPerm = MY_PERMC; for (imat = fimat; imat <= nimat; ++imat) { /* All matrix types */ if ( imat ) { /* Skip types 5, 6, or 7 if the matrix size is too small. */ zerot = (imat >= 5 && imat <= 7); if ( zerot && n < imat-4 ) continue; /* Set up parameters with DLATB4 and generate a test matrix with DLATMS. */ dlatb4_slu(path, &imat, &n, &n, sym, &kl, &ku, &anorm, &mode, &cndnum, dist); dlatms_slu(&n, &n, dist, iseed, sym, &rwork[0], &mode, &cndnum, &anorm, &kl, &ku, "No packing", Afull, &lda, &wwork[0], &info); if ( info ) { printf(FMT3, "DLATMS", info, izero, n, nrhs, imat, nfail); continue; } /* For types 5-7, zero one or more columns of the matrix to test that INFO is returned correctly. */ if ( zerot ) { if ( imat == 5 ) izero = 1; else if ( imat == 6 ) izero = n; else izero = n / 2 + 1; ioff = (izero - 1) * lda; if ( imat < 7 ) { for (i = 0; i < n; ++i) Afull[ioff + i] = zero; } else { for (j = 0; j < n - izero + 1; ++j) for (i = 0; i < n; ++i) Afull[ioff + i + j*lda] = zero; } } else { izero = 0; } /* Convert to sparse representation. */ sp_dconvert(n, n, Afull, lda, kl, ku, a, asub, xa, &nnz); } else { izero = 0; zerot = 0; } dCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE); /* Save a copy of matrix A in ASAV */ dCreate_CompCol_Matrix(&ASAV, m, n, nnz, a_save, asub_save, xa_save, SLU_NC, SLU_D, SLU_GE); dCopy_CompCol_Matrix(&A, &ASAV); /* Form exact solution. */ dGenXtrue(n, nrhs, xact, ldx); StatInit(&stat); for (iequed = 0; iequed < 4; ++iequed) { *equed = equeds[iequed]; if (iequed == 0) nfact = 4; else nfact = 1; /* Only test factored, pre-equilibrated matrix */ for (ifact = 0; ifact < nfact; ++ifact) { fact = facts[ifact]; options.Fact = fact; for (equil = 0; equil < 2; ++equil) { options.Equil = equil; prefact = ( options.Fact == FACTORED || options.Fact == SamePattern_SameRowPerm ); /* Need a first factor */ nofact = (options.Fact != FACTORED); /* Not factored */ /* Restore the matrix A. */ dCopy_CompCol_Matrix(&ASAV, &A); if ( zerot ) { if ( prefact ) continue; } else if ( options.Fact == FACTORED ) { if ( equil || iequed ) { /* Compute row and column scale factors to equilibrate matrix A. */ dgsequ(&A, R, C, &rowcnd, &colcnd, &amax, &info); /* Force equilibration. */ if ( !info && n > 0 ) { if ( strncmp(equed, "R", 1)==0 ) { rowcnd = 0.; colcnd = 1.; } else if ( strncmp(equed, "C", 1)==0 ) { rowcnd = 1.; colcnd = 0.; } else if ( strncmp(equed, "B", 1)==0 ) { rowcnd = 0.; colcnd = 0.; } } /* Equilibrate the matrix. */ dlaqgs(&A, R, C, rowcnd, colcnd, amax, equed); } } if ( prefact ) { /* Need a factor for the first time */ /* Save Fact option. */ fact = options.Fact; options.Fact = DOFACT; /* Preorder the matrix, obtain the column etree. */ sp_preorder(&options, &A, perm_c, etree, &AC); /* Factor the matrix AC. */ dgstrf(&options, &AC, relax, panel_size, etree, work, lwork, perm_c, perm_r, &L, &U, &Glu, &stat, &info); if ( info ) { printf("** First factor: info %d, equed %c\n", info, *equed); if ( lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); exit(0); } } Destroy_CompCol_Permuted(&AC); /* Restore Fact option. */ options.Fact = fact; } /* if .. first time factor */ for (itran = 0; itran < NTRAN; ++itran) { trans = transs[itran]; options.Trans = trans; /* Restore the matrix A. */ dCopy_CompCol_Matrix(&ASAV, &A); /* Set the right hand side. */ dFillRHS(trans, nrhs, xact, ldx, &A, &B); dCopy_Dense_Matrix(m, nrhs, rhsb, ldb, bsav, ldb); /*---------------- * Test dgssv *----------------*/ if ( options.Fact == DOFACT && itran == 0) { /* Not yet factored, and untransposed */ dCopy_Dense_Matrix(m, nrhs, rhsb, ldb, solx, ldx); dgssv(&options, &A, perm_c, perm_r, &L, &U, &X, &stat, &info); if ( info && info != izero ) { printf(FMT3, "dgssv", info, izero, n, nrhs, imat, nfail); } else { /* Reconstruct matrix from factors and compute residual. */ dgst01(m, n, &A, &L, &U, perm_c, perm_r, &result[0]); nt = 1; if ( izero == 0 ) { /* Compute residual of the computed solution. */ dCopy_Dense_Matrix(m, nrhs, rhsb, ldb, wwork, ldb); dgst02(trans, m, n, nrhs, &A, solx, ldx, wwork,ldb, &result[1]); nt = 2; } /* Print information about the tests that did not pass the threshold. */ for (i = 0; i < nt; ++i) { if ( result[i] >= THRESH ) { printf(FMT1, "dgssv", n, i, result[i]); ++nfail; } } nrun += nt; } /* else .. info == 0 */ /* Restore perm_c. */ for (i = 0; i < n; ++i) perm_c[i] = pc_save[i]; if (lwork == 0) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } } /* if .. end of testing dgssv */ /*---------------- * Test dgssvx *----------------*/ /* Equilibrate the matrix if fact = FACTORED and equed = 'R', 'C', or 'B'. */ if ( options.Fact == FACTORED && (equil || iequed) && n > 0 ) { dlaqgs(&A, R, C, rowcnd, colcnd, amax, equed); } /* Solve the system and compute the condition number and error bounds using dgssvx. */ dgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &Glu, &mem_usage, &stat, &info); if ( info && info != izero ) { printf(FMT3, "dgssvx", info, izero, n, nrhs, imat, nfail); if ( lwork == -1 ) { printf("** Estimated memory: %.0f bytes\n", mem_usage.total_needed); exit(0); } } else { if ( !prefact ) { /* Reconstruct matrix from factors and compute residual. */ dgst01(m, n, &A, &L, &U, perm_c, perm_r, &result[0]); k1 = 0; } else { k1 = 1; } if ( !info ) { /* Compute residual of the computed solution.*/ dCopy_Dense_Matrix(m, nrhs, bsav, ldb, wwork, ldb); dgst02(trans, m, n, nrhs, &ASAV, solx, ldx, wwork, ldb, &result[1]); /* Check solution from generated exact solution. */ dgst04(n, nrhs, solx, ldx, xact, ldx, rcond, &result[2]); /* Check the error bounds from iterative refinement. */ dgst07(trans, n, nrhs, &ASAV, bsav, ldb, solx, ldx, xact, ldx, ferr, berr, &result[3]); /* Print information about the tests that did not pass the threshold. */ for (i = k1; i < NTESTS; ++i) { if ( result[i] >= THRESH ) { printf(FMT2, "dgssvx", options.Fact, trans, *equed, n, imat, i, result[i]); ++nfail; } } nrun += NTESTS; } /* if .. info == 0 */ } /* else .. end of testing dgssvx */ } /* for itran ... */ if ( lwork == 0 ) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } } /* for equil ... */ } /* for ifact ... */ } /* for iequed ... */ #if 0 if ( !info ) { PrintPerf(&L, &U, &mem_usage, rpg, rcond, ferr, berr, equed); } #endif Destroy_SuperMatrix_Store(&A); Destroy_SuperMatrix_Store(&ASAV); StatFree(&stat); } /* for imat ... */ /* Print a summary of the results. */ PrintSumm("DGE", nfail, nrun, nerrs); if ( strcmp(matrix_type, "LA") == 0 ) SUPERLU_FREE (Afull); SUPERLU_FREE (rhsb); SUPERLU_FREE (bsav); SUPERLU_FREE (solx); SUPERLU_FREE (xact); SUPERLU_FREE (etree); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); SUPERLU_FREE (pc_save); SUPERLU_FREE (R); SUPERLU_FREE (C); SUPERLU_FREE (ferr); SUPERLU_FREE (berr); SUPERLU_FREE (rwork); SUPERLU_FREE (wwork); Destroy_SuperMatrix_Store(&B); Destroy_SuperMatrix_Store(&X); #if 0 Destroy_CompCol_Matrix(&A); Destroy_CompCol_Matrix(&ASAV); #else SUPERLU_FREE(a); SUPERLU_FREE(asub); SUPERLU_FREE(xa); SUPERLU_FREE(a_save); SUPERLU_FREE(asub_save); SUPERLU_FREE(xa_save); #endif if ( lwork > 0 ) { SUPERLU_FREE (work); Destroy_SuperMatrix_Store(&L); Destroy_SuperMatrix_Store(&U); } return 0; }
int main ( int argc, char *argv[] ) /**********************************************************************/ /* Purpose: SUPER_LU_D0 runs a small 5 by 5 example of the use of SUPER_LU. Modified: 23 April 2004 Reference: James Demmel, John Gilbert, Xiaoye Li, SuperLU Users's Guide, Sections 1 and 2. */ { double *a; SuperMatrix A; int *asub; SuperMatrix B; int i; int info; SuperMatrix L; int m; int n; int nnz; int nrhs; superlu_options_t options; int *perm_c; int *perm_r; int permc_spec; double *rhs; double sol[5]; SuperLUStat_t stat; SuperMatrix U; int *xa; /* Say hello. */ printf ( "\n" ); printf ( "SUPER_LU_D0:\n" ); printf ( " Simple 5 by 5 example of SUPER_LU solver.\n" ); /* Initialize parameters. */ m = 5; n = 5; nnz = 12; /* Set aside space for the arrays. */ a = doubleMalloc ( nnz ); if ( !a ) { ABORT ( "Malloc fails for a[]." ); } asub = intMalloc ( nnz ); if ( !asub ) { ABORT ( "Malloc fails for asub[]." ); } xa = intMalloc ( n+1 ); if ( !xa ) { ABORT ( "Malloc fails for xa[]." ); } /* Initialize matrix A. */ a[0] = 19.0; a[1] = 12.0; a[2] = 12.0; a[3] = 21.0; a[4] = 12.0; a[5] = 12.0; a[6] = 21.0; a[7] = 16.0; a[8] = 21.0; a[9] = 5.0; a[10]= 21.0; a[11]= 18.0; asub[0] = 0; asub[1] = 1; asub[2] = 4; asub[3] = 1; asub[4] = 2; asub[5] = 4; asub[6] = 0; asub[7] = 2; asub[8] = 0; asub[9] = 3; asub[10]= 3; asub[11]= 4; xa[0] = 0; xa[1] = 3; xa[2] = 6; xa[3] = 8; xa[4] = 10; xa[5] = 12; sol[0] = -0.031250000; sol[1] = 0.065476190; sol[2] = 0.013392857; sol[3] = 0.062500000; sol[4] = 0.032738095; /* Create matrix A in the format expected by SuperLU. */ dCreate_CompCol_Matrix ( &A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE ); /* Create the right-hand side matrix B. */ nrhs = 1; rhs = doubleMalloc ( m * nrhs ); if ( !rhs ) { ABORT("Malloc fails for rhs[]."); } for ( i = 0; i < m; i++ ) { rhs[i] = 1.0; } dCreate_Dense_Matrix ( &B, m, nrhs, rhs, m, SLU_DN, SLU_D, SLU_GE ); /* Set up the arrays for the permutations. */ perm_r = intMalloc ( m ); if ( !perm_r ) { ABORT ( "Malloc fails for perm_r[]." ); } perm_c = intMalloc ( n ); if ( !perm_c ) { ABORT ( "Malloc fails for perm_c[]." ); } /* Set the default input options, and then adjust some of them. */ set_default_options ( &options ); options.ColPerm = NATURAL; /* Initialize the statistics variables. */ StatInit ( &stat ); /* Factor the matrix and solve the linear system. */ dgssv ( &options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info ); /* Print some of the results. */ dPrint_CompCol_Matrix ( "Matrix A", &A ); dPrint_SuperNode_Matrix ( "Factor L", &L ); dPrint_CompCol_Matrix ( "Factor U", &U ); dPrint_Dense_Matrix ( "Solution X", &B ); printf ( "\n" ); printf ( " The exact solution:\n" ); printf ( "\n" ); for ( i = 0; i < n; i++ ) { printf ( "%d %f\n", i, sol[i] ); } printf ( "\n" ); print_int_vec ( "perm_r", m, perm_r ); /* De-allocate storage. */ SUPERLU_FREE ( rhs ); SUPERLU_FREE ( perm_r ); SUPERLU_FREE ( perm_c ); Destroy_CompCol_Matrix ( &A ); Destroy_SuperMatrix_Store ( &B ); Destroy_SuperNode_Matrix ( &L ); Destroy_CompCol_Matrix ( &U ); StatFree ( &stat ); printf ( "\n" ); printf ( "SUPER_LU_D0:\n" ); printf ( " Normal end of execution.\n" ); return 0; }
void dgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, int *etree, char *equed, double *R, double *C, SuperMatrix *L, SuperMatrix *U, void *work, int lwork, SuperMatrix *B, SuperMatrix *X, double *recip_pivot_growth, double *rcond, double *ferr, double *berr, mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info ) { /* * Purpose * ======= * * DGSSVX solves the system of linear equations A*X=B or A'*X=B, using * the LU factorization from dgstrf(). Error bounds on the solution and * a condition estimate are also provided. It performs the following steps: * * 1. If A is stored column-wise (A->Stype = SLU_NC): * * 1.1. If options->Equil = YES, scaling factors are computed to * equilibrate the system: * options->Trans = NOTRANS: * diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B * options->Trans = TRANS: * (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B * options->Trans = CONJ: * (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A is * overwritten by diag(R)*A*diag(C) and B by diag(R)*B * (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans * = TRANS or CONJ). * * 1.2. Permute columns of A, forming A*Pc, where Pc is a permutation * matrix that usually preserves sparsity. * For more details of this step, see sp_preorder.c. * * 1.3. If options->Fact != FACTORED, the LU decomposition is used to * factor the matrix A (after equilibration if options->Equil = YES) * as Pr*A*Pc = L*U, with Pr determined by partial pivoting. * * 1.4. Compute the reciprocal pivot growth factor. * * 1.5. If some U(i,i) = 0, so that U is exactly singular, then the * routine returns with info = i. Otherwise, the factored form of * A is used to estimate the condition number of the matrix A. If * the reciprocal of the condition number is less than machine * precision, info = A->ncol+1 is returned as a warning, but the * routine still goes on to solve for X and computes error bounds * as described below. * * 1.6. The system of equations is solved for X using the factored form * of A. * * 1.7. If options->IterRefine != NOREFINE, iterative refinement is * applied to improve the computed solution matrix and calculate * error bounds and backward error estimates for it. * * 1.8. If equilibration was used, the matrix X is premultiplied by * diag(C) (if options->Trans = NOTRANS) or diag(R) * (if options->Trans = TRANS or CONJ) so that it solves the * original system before equilibration. * * 2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm * to the transpose of A: * * 2.1. If options->Equil = YES, scaling factors are computed to * equilibrate the system: * options->Trans = NOTRANS: * diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B * options->Trans = TRANS: * (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B * options->Trans = CONJ: * (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A' is * overwritten by diag(R)*A'*diag(C) and B by diag(R)*B * (if trans='N') or diag(C)*B (if trans = 'T' or 'C'). * * 2.2. Permute columns of transpose(A) (rows of A), * forming transpose(A)*Pc, where Pc is a permutation matrix that * usually preserves sparsity. * For more details of this step, see sp_preorder.c. * * 2.3. If options->Fact != FACTORED, the LU decomposition is used to * factor the transpose(A) (after equilibration if * options->Fact = YES) as Pr*transpose(A)*Pc = L*U with the * permutation Pr determined by partial pivoting. * * 2.4. Compute the reciprocal pivot growth factor. * * 2.5. If some U(i,i) = 0, so that U is exactly singular, then the * routine returns with info = i. Otherwise, the factored form * of transpose(A) is used to estimate the condition number of the * matrix A. If the reciprocal of the condition number * is less than machine precision, info = A->nrow+1 is returned as * a warning, but the routine still goes on to solve for X and * computes error bounds as described below. * * 2.6. The system of equations is solved for X using the factored form * of transpose(A). * * 2.7. If options->IterRefine != NOREFINE, iterative refinement is * applied to improve the computed solution matrix and calculate * error bounds and backward error estimates for it. * * 2.8. If equilibration was used, the matrix X is premultiplied by * diag(C) (if options->Trans = NOTRANS) or diag(R) * (if options->Trans = TRANS or CONJ) so that it solves the * original system before equilibration. * * See supermatrix.h for the definition of 'SuperMatrix' structure. * * Arguments * ========= * * options (input) superlu_options_t* * The structure defines the input parameters to control * how the LU decomposition will be performed and how the * system will be solved. * * A (input/output) SuperMatrix* * Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number * of the linear equations is A->nrow. Currently, the type of A can be: * Stype = SLU_NC or SLU_NR, Dtype = SLU_D, Mtype = SLU_GE. * In the future, more general A may be handled. * * On entry, If options->Fact = FACTORED and equed is not 'N', * then A must have been equilibrated by the scaling factors in * R and/or C. * On exit, A is not modified if options->Equil = NO, or if * options->Equil = YES but equed = 'N' on exit. * Otherwise, if options->Equil = YES and equed is not 'N', * A is scaled as follows: * If A->Stype = SLU_NC: * equed = 'R': A := diag(R) * A * equed = 'C': A := A * diag(C) * equed = 'B': A := diag(R) * A * diag(C). * If A->Stype = SLU_NR: * equed = 'R': transpose(A) := diag(R) * transpose(A) * equed = 'C': transpose(A) := transpose(A) * diag(C) * equed = 'B': transpose(A) := diag(R) * transpose(A) * diag(C). * * perm_c (input/output) int* * If A->Stype = SLU_NC, Column permutation vector of size A->ncol, * which defines the permutation matrix Pc; perm_c[i] = j means * column i of A is in position j in A*Pc. * On exit, perm_c may be overwritten by the product of the input * perm_c and a permutation that postorders the elimination tree * of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree * is already in postorder. * * If A->Stype = SLU_NR, column permutation vector of size A->nrow, * which describes permutation of columns of transpose(A) * (rows of A) as described above. * * perm_r (input/output) int* * If A->Stype = SLU_NC, row permutation vector of size A->nrow, * which defines the permutation matrix Pr, and is determined * by partial pivoting. perm_r[i] = j means row i of A is in * position j in Pr*A. * * If A->Stype = SLU_NR, permutation vector of size A->ncol, which * determines permutation of rows of transpose(A) * (columns of A) as described above. * * If options->Fact = SamePattern_SameRowPerm, the pivoting routine * will try to use the input perm_r, unless a certain threshold * criterion is violated. In that case, perm_r is overwritten by a * new permutation determined by partial pivoting or diagonal * threshold pivoting. * Otherwise, perm_r is output argument. * * etree (input/output) int*, dimension (A->ncol) * Elimination tree of Pc'*A'*A*Pc. * If options->Fact != FACTORED and options->Fact != DOFACT, * etree is an input argument, otherwise it is an output argument. * Note: etree is a vector of parent pointers for a forest whose * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol. * * equed (input/output) char* * Specifies the form of equilibration that was done. * = 'N': No equilibration. * = 'R': Row equilibration, i.e., A was premultiplied by diag(R). * = 'C': Column equilibration, i.e., A was postmultiplied by diag(C). * = 'B': Both row and column equilibration, i.e., A was replaced * by diag(R)*A*diag(C). * If options->Fact = FACTORED, equed is an input argument, * otherwise it is an output argument. * * R (input/output) double*, dimension (A->nrow) * The row scale factors for A or transpose(A). * If equed = 'R' or 'B', A (if A->Stype = SLU_NC) or transpose(A) * (if A->Stype = SLU_NR) is multiplied on the left by diag(R). * If equed = 'N' or 'C', R is not accessed. * If options->Fact = FACTORED, R is an input argument, * otherwise, R is output. * If options->zFact = FACTORED and equed = 'R' or 'B', each element * of R must be positive. * * C (input/output) double*, dimension (A->ncol) * The column scale factors for A or transpose(A). * If equed = 'C' or 'B', A (if A->Stype = SLU_NC) or transpose(A) * (if A->Stype = SLU_NR) is multiplied on the right by diag(C). * If equed = 'N' or 'R', C is not accessed. * If options->Fact = FACTORED, C is an input argument, * otherwise, C is output. * If options->Fact = FACTORED and equed = 'C' or 'B', each element * of C must be positive. * * L (output) SuperMatrix* * The factor L from the factorization * Pr*A*Pc=L*U (if A->Stype SLU_= NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR). * Uses compressed row subscripts storage for supernodes, i.e., * L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU. * * U (output) SuperMatrix* * The factor U from the factorization * Pr*A*Pc=L*U (if A->Stype = SLU_NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR). * Uses column-wise storage scheme, i.e., U has types: * Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU. * * work (workspace/output) void*, size (lwork) (in bytes) * User supplied workspace, should be large enough * to hold data structures for factors L and U. * On exit, if fact is not 'F', L and U point to this array. * * lwork (input) int * Specifies the size of work array in bytes. * = 0: allocate space internally by system malloc; * > 0: use user-supplied work array of length lwork in bytes, * returns error if space runs out. * = -1: the routine guesses the amount of space needed without * performing the factorization, and returns it in * mem_usage->total_needed; no other side effects. * * See argument 'mem_usage' for memory usage statistics. * * B (input/output) SuperMatrix* * B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE. * On entry, the right hand side matrix. * If B->ncol = 0, only LU decomposition is performed, the triangular * solve is skipped. * On exit, * if equed = 'N', B is not modified; otherwise * if A->Stype = SLU_NC: * if options->Trans = NOTRANS and equed = 'R' or 'B', * B is overwritten by diag(R)*B; * if options->Trans = TRANS or CONJ and equed = 'C' of 'B', * B is overwritten by diag(C)*B; * if A->Stype = SLU_NR: * if options->Trans = NOTRANS and equed = 'C' or 'B', * B is overwritten by diag(C)*B; * if options->Trans = TRANS or CONJ and equed = 'R' of 'B', * B is overwritten by diag(R)*B. * * X (output) SuperMatrix* * X has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE. * If info = 0 or info = A->ncol+1, X contains the solution matrix * to the original system of equations. Note that A and B are modified * on exit if equed is not 'N', and the solution to the equilibrated * system is inv(diag(C))*X if options->Trans = NOTRANS and * equed = 'C' or 'B', or inv(diag(R))*X if options->Trans = 'T' or 'C' * and equed = 'R' or 'B'. * * recip_pivot_growth (output) double* * The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ). * The infinity norm is used. If recip_pivot_growth is much less * than 1, the stability of the LU factorization could be poor. * * rcond (output) double* * The estimate of the reciprocal condition number of the matrix A * after equilibration (if done). If rcond is less than the machine * precision (in particular, if rcond = 0), the matrix is singular * to working precision. This condition is indicated by a return * code of info > 0. * * FERR (output) double*, dimension (B->ncol) * The estimated forward error bound for each solution vector * X(j) (the j-th column of the solution matrix X). * If XTRUE is the true solution corresponding to X(j), FERR(j) * is an estimated upper bound for the magnitude of the largest * element in (X(j) - XTRUE) divided by the magnitude of the * largest element in X(j). The estimate is as reliable as * the estimate for RCOND, and is almost always a slight * overestimate of the true error. * If options->IterRefine = NOREFINE, ferr = 1.0. * * BERR (output) double*, dimension (B->ncol) * The componentwise relative backward error of each solution * vector X(j) (i.e., the smallest relative change in * any element of A or B that makes X(j) an exact solution). * If options->IterRefine = NOREFINE, berr = 1.0. * * mem_usage (output) mem_usage_t* * Record the memory usage statistics, consisting of following fields: * - for_lu (float) * The amount of space used in bytes for L\U data structures. * - total_needed (float) * The amount of space needed in bytes to perform factorization. * - expansions (int) * The number of memory expansions during the LU factorization. * * stat (output) SuperLUStat_t* * Record the statistics on runtime and floating-point operation count. * See util.h for the definition of 'SuperLUStat_t'. * * info (output) int* * = 0: successful exit * < 0: if info = -i, the i-th argument had an illegal value * > 0: if info = i, and i is * <= A->ncol: U(i,i) is exactly zero. The factorization has * been completed, but the factor U is exactly * singular, so the solution and error bounds * could not be computed. * = A->ncol+1: U is nonsingular, but RCOND is less than machine * precision, meaning that the matrix is singular to * working precision. Nevertheless, the solution and * error bounds are computed because there are a number * of situations where the computed solution can be more * accurate than the value of RCOND would suggest. * > A->ncol+1: number of bytes allocated when memory allocation * failure occurred, plus A->ncol. * */ DNformat *Bstore, *Xstore; double *Bmat, *Xmat; int ldb, ldx, nrhs; SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/ SuperMatrix AC; /* Matrix postmultiplied by Pc */ int colequ, equil, nofact, notran, rowequ, permc_spec; trans_t trant; char norm[1]; int i, j, info1; double amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin; int relax, panel_size; double drop_tol; double t0; /* temporary time */ double *utime; /* External functions */ extern double dlangs(char *, SuperMatrix *); extern double hypre_F90_NAME_LAPACK(dlamch,DLAMCH)(const char *); Bstore = (DNformat*) B->Store; Xstore = (DNformat*) X->Store; Bmat = ( double*) Bstore->nzval; Xmat = ( double*) Xstore->nzval; ldb = Bstore->lda; ldx = Xstore->lda; nrhs = B->ncol; *info = 0; nofact = (options->Fact != FACTORED); equil = (options->Equil == YES); notran = (options->Trans == NOTRANS); if ( nofact ) { *(unsigned char *)equed = 'N'; rowequ = FALSE; colequ = FALSE; } else { rowequ = superlu_lsame(equed, "R") || superlu_lsame(equed, "B"); colequ = superlu_lsame(equed, "C") || superlu_lsame(equed, "B"); smlnum = hypre_F90_NAME_LAPACK(dlamch,DLAMCH)("Safe minimum"); bignum = 1. / smlnum; } #if 0 printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n", options->Fact, options->Trans, *equed); #endif /* Test the input parameters */ if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern && options->Fact != SamePattern_SameRowPerm && !notran && options->Trans != TRANS && options->Trans != CONJ && !equil && options->Equil != NO) *info = -1; else if ( A->nrow != A->ncol || A->nrow < 0 || (A->Stype != SLU_NC && A->Stype != SLU_NR) || A->Dtype != SLU_D || A->Mtype != SLU_GE ) *info = -2; else if (options->Fact == FACTORED && !(rowequ || colequ || superlu_lsame(equed, "N"))) *info = -6; else { if (rowequ) { rcmin = bignum; rcmax = 0.; for (j = 0; j < A->nrow; ++j) { rcmin = SUPERLU_MIN(rcmin, R[j]); rcmax = SUPERLU_MAX(rcmax, R[j]); } if (rcmin <= 0.) *info = -7; else if ( A->nrow > 0) rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); else rowcnd = 1.; } if (colequ && *info == 0) { rcmin = bignum; rcmax = 0.; for (j = 0; j < A->nrow; ++j) { rcmin = SUPERLU_MIN(rcmin, C[j]); rcmax = SUPERLU_MAX(rcmax, C[j]); } if (rcmin <= 0.) *info = -8; else if (A->nrow > 0) colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); else colcnd = 1.; } if (*info == 0) { if ( lwork < -1 ) *info = -12; else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) || B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE ) *info = -13; else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) || (B->ncol != 0 && B->ncol != X->ncol) || X->Stype != SLU_DN || X->Dtype != SLU_D || X->Mtype != SLU_GE ) *info = -14; } } if (*info != 0) { i = -(*info); superlu_xerbla("dgssvx", &i); return; } /* Initialization for factor parameters */ panel_size = sp_ienv(1); relax = sp_ienv(2); drop_tol = 0.0; utime = stat->utime; /* Convert A to SLU_NC format when necessary. */ if ( A->Stype == SLU_NR ) { NRformat *Astore = (NRformat*) A->Store; AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, (double*) Astore->nzval, Astore->colind, Astore->rowptr, SLU_NC, A->Dtype, A->Mtype); if ( notran ) { /* Reverse the transpose argument. */ trant = TRANS; notran = 0; } else { trant = NOTRANS; notran = 1; } } else { /* A->Stype == SLU_NC */ trant = options->Trans; AA = A; } if ( nofact && equil ) { t0 = SuperLU_timer_(); /* Compute row and column scalings to equilibrate the matrix A. */ dgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1); if ( info1 == 0 ) { /* Equilibrate matrix A. */ dlaqgs(AA, R, C, rowcnd, colcnd, amax, equed); rowequ = superlu_lsame(equed, "R") || superlu_lsame(equed, "B"); colequ = superlu_lsame(equed, "C") || superlu_lsame(equed, "B"); } utime[EQUIL] = SuperLU_timer_() - t0; } if ( nrhs > 0 ) { /* Scale the right hand side if equilibration was performed. */ if ( notran ) { if ( rowequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Bmat[i + j*ldb] *= R[i]; } } } else if ( colequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Bmat[i + j*ldb] *= C[i]; } } } if ( nofact ) { t0 = SuperLU_timer_(); /* * Gnet column permutation vector perm_c[], according to permc_spec: * permc_spec = NATURAL: natural ordering * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A * permc_spec = MMD_ATA: minimum degree on structure of A'*A * permc_spec = COLAMD: approximate minimum degree column ordering * permc_spec = MY_PERMC: the ordering already supplied in perm_c[] */ permc_spec = options->ColPerm; if ( permc_spec != MY_PERMC && options->Fact == DOFACT ) get_perm_c(permc_spec, AA, perm_c); utime[COLPERM] = SuperLU_timer_() - t0; t0 = SuperLU_timer_(); sp_preorder(options, AA, perm_c, etree, &AC); utime[ETREE] = SuperLU_timer_() - t0; /* printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", relax, panel_size, sp_ienv(3), sp_ienv(4)); fflush(stdout); */ /* Compute the LU factorization of A*Pc. */ t0 = SuperLU_timer_(); dgstrf(options, &AC, drop_tol, relax, panel_size, etree, work, lwork, perm_c, perm_r, L, U, stat, info); utime[FACT] = SuperLU_timer_() - t0; if ( lwork == -1 ) { mem_usage->total_needed = *info - A->ncol; return; } } if ( options->PivotGrowth ) { if ( *info > 0 ) { if ( *info <= A->ncol ) { /* Compute the reciprocal pivot growth factor of the leading rank-deficient *info columns of A. */ *recip_pivot_growth = dPivotGrowth(*info, AA, perm_c, L, U); } return; } /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */ *recip_pivot_growth = dPivotGrowth(A->ncol, AA, perm_c, L, U); } if ( options->ConditionNumber ) { /* Estimate the reciprocal of the condition number of A. */ t0 = SuperLU_timer_(); if ( notran ) { *(unsigned char *)norm = '1'; } else { *(unsigned char *)norm = 'I'; } anorm = dlangs(norm, AA); dgscon(norm, L, U, anorm, rcond, stat, info); utime[RCOND] = SuperLU_timer_() - t0; } if ( nrhs > 0 ) { /* Compute the solution matrix X. */ for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */ for (i = 0; i < B->nrow; i++) Xmat[i + j*ldx] = Bmat[i + j*ldb]; t0 = SuperLU_timer_(); dgstrs (trant, L, U, perm_c, perm_r, X, stat, info); utime[SOLVE] = SuperLU_timer_() - t0; /* Use iterative refinement to improve the computed solution and compute error bounds and backward error estimates for it. */ t0 = SuperLU_timer_(); if ( options->IterRefine != NOREFINE ) { dgsrfs(trant, AA, L, U, perm_c, perm_r, equed, R, C, B, X, ferr, berr, stat, info); } else { for (j = 0; j < nrhs; ++j) ferr[j] = berr[j] = 1.0; } utime[REFINE] = SuperLU_timer_() - t0; /* Transform the solution matrix X to a solution of the original system. */ if ( notran ) { if ( colequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Xmat[i + j*ldx] *= C[i]; } } } else if ( rowequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Xmat[i + j*ldx] *= R[i]; } } } /* end if nrhs > 0 */ if ( options->ConditionNumber ) { /* Set INFO = A->ncol+1 if the matrix is singular to working precision. */ if (*rcond < hypre_F90_NAME_LAPACK(dlamch,DLAMCH)("E")) *info=A->ncol+1; } if ( nofact ) { dQuerySpace(L, U, mem_usage); Destroy_CompCol_Permuted(&AC); } if ( A->Stype == SLU_NR ) { Destroy_SuperMatrix_Store(AA); SUPERLU_FREE(AA); } }
void dgssv(SuperMatrix *A, int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U, SuperMatrix *B, int *info ) { /* * Purpose * ======= * * DGSSV solves the system of linear equations A*X=B, using the * LU factorization from DGSTRF. It performs the following steps: * * 1. If A is stored column-wise (A->Stype = SLU_NC): * * 1.1. Permute the columns of A, forming A*Pc, where Pc * is a permutation matrix. For more details of this step, * see sp_preorder.c. * * 1.2. Factor A as Pr*A*Pc=L*U with the permutation Pr determined * by Gaussian elimination with partial pivoting. * L is unit lower triangular with offdiagonal entries * bounded by 1 in magnitude, and U is upper triangular. * * 1.3. Solve the system of equations A*X=B using the factored * form of A. * * 2. If A is stored row-wise (A->Stype = SLU_NR), apply the * above algorithm to the transpose of A: * * 2.1. Permute columns of transpose(A) (rows of A), * forming transpose(A)*Pc, where Pc is a permutation matrix. * For more details of this step, see sp_preorder.c. * * 2.2. Factor A as Pr*transpose(A)*Pc=L*U with the permutation Pr * determined by Gaussian elimination with partial pivoting. * L is unit lower triangular with offdiagonal entries * bounded by 1 in magnitude, and U is upper triangular. * * 2.3. Solve the system of equations A*X=B using the factored * form of A. * * See supermatrix.h for the definition of 'SuperMatrix' structure. * * Arguments * ========= * * A (input) SuperMatrix* * Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number * of linear equations is A->nrow. Currently, the type of A can be: * Stype = SLU_NC or SLU_NR; Dtype = SLU_D; Mtype = SLU_GE. * In the future, more general A may be handled. * * perm_c (input/output) int* * If A->Stype = SLU_NC, column permutation vector of size A->ncol * which defines the permutation matrix Pc; perm_c[i] = j means * column i of A is in position j in A*Pc. * On exit, perm_c may be overwritten by the product of the input * perm_c and a permutation that postorders the elimination tree * of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree * is already in postorder. * * If A->Stype = SLU_NR, column permutation vector of size A->nrow * which describes permutation of columns of transpose(A) * (rows of A) as described above. * * perm_r (output) int* * If A->Stype = SLU_NC, row permutation vector of size A->nrow, * which defines the permutation matrix Pr, and is determined * by partial pivoting. perm_r[i] = j means row i of A is in * position j in Pr*A. * * If A->Stype = SLU_NR, permutation vector of size A->ncol, which * determines permutation of rows of transpose(A) * (columns of A) as described above. * * L (output) SuperMatrix* * The factor L from the factorization * Pr*A*Pc=L*U (if A->Stype = SLU_NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR). * Uses compressed row subscripts storage for supernodes, i.e., * L has types: Stype = SC, Dtype = SLU_D, Mtype = TRLU. * * U (output) SuperMatrix* * The factor U from the factorization * Pr*A*Pc=L*U (if A->Stype = SLU_NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = SLU_NR). * Uses column-wise storage scheme, i.e., U has types: * Stype = SLU_NC, Dtype = SLU_D, Mtype = TRU. * * B (input/output) SuperMatrix* * B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE. * On entry, the right hand side matrix. * On exit, the solution matrix if info = 0; * * info (output) int* * = 0: successful exit * > 0: if info = i, and i is * <= A->ncol: U(i,i) is exactly zero. The factorization has * been completed, but the factor U is exactly singular, * so the solution could not be computed. * > A->ncol: number of bytes allocated when memory allocation * failure occurred, plus A->ncol. * */ double t1; /* Temporary time */ char refact[1], trans[1]; DNformat *Bstore; SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/ SuperMatrix AC; /* Matrix postmultiplied by Pc */ int lwork = 0, *etree, i; /* Set default values for some parameters */ double diag_pivot_thresh = 1.0; double drop_tol = 0; int panel_size; /* panel size */ int relax; /* no of columns in a relaxed snodes */ double *utime; extern SuperLUStat_t SuperLUStat; /* Test the input parameters ... */ *info = 0; Bstore = B->Store; if ( A->nrow != A->ncol || A->nrow < 0 || (A->Stype != SLU_NC && A->Stype != SLU_NR) || A->Dtype != SLU_D || A->Mtype != SLU_GE ) *info = -1; else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) || B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE ) *info = -6; if ( *info != 0 ) { i = -(*info); xerbla_("dgssv", &i); return; } *refact = 'N'; *trans = 'N'; panel_size = sp_ienv(1); relax = sp_ienv(2); StatInit(panel_size, relax); utime = SuperLUStat.utime; /* Convert A to SLU_NC format when necessary. */ if ( A->Stype == SLU_NR ) { NRformat *Astore = A->Store; AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, Astore->nzval, Astore->colind, Astore->rowptr, SLU_NC, A->Dtype, A->Mtype); *trans = 'T'; } else if ( A->Stype == SLU_NC ) AA = A; etree = intMalloc(A->ncol); t1 = SuperLU_timer_(); sp_preorder(refact, AA, perm_c, etree, &AC); utime[ETREE] = SuperLU_timer_() - t1; /*printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", relax, panel_size, sp_ienv(3), sp_ienv(4));*/ t1 = SuperLU_timer_(); /* Compute the LU factorization of A. */ dgstrf(refact, &AC, diag_pivot_thresh, drop_tol, relax, panel_size, etree, NULL, lwork, perm_r, perm_c, L, U, info); utime[FACT] = SuperLU_timer_() - t1; t1 = SuperLU_timer_(); if ( *info == 0 ) { /* Solve the system A*X=B, overwriting B with X. */ dgstrs (trans, L, U, perm_r, perm_c, B, info); } utime[SOLVE] = SuperLU_timer_() - t1; SUPERLU_FREE (etree); Destroy_CompCol_Permuted(&AC); if ( A->Stype == SLU_NR ) { Destroy_SuperMatrix_Store(AA); SUPERLU_FREE(AA); } /*PrintStat( &SuperLUStat );*/ StatFree(); }
main(int argc, char *argv[]) { SuperMatrix A; NCformat *Astore; double *a; int *asub, *xa; int *perm_c; /* column permutation vector */ int *perm_r; /* row permutations from partial pivoting */ SuperMatrix L; /* factor L */ SCformat *Lstore; SuperMatrix U; /* factor U */ NCformat *Ustore; SuperMatrix B; int nrhs, ldx, info, m, n, nnz; double *xact, *rhs; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Enter main()"); #endif /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options(&options); /* Now we modify the default options to use the symmetric mode. */ options.SymmetricMode = YES; options.ColPerm = MMD_AT_PLUS_A; options.DiagPivotThresh = 0.001; #if 1 /* Read matrix A from a file in Harwell-Boeing format.*/ if (argc < 2) { printf("Usage:\n%s [OPTION] < [INPUT] > [OUTPUT]\nOPTION:\n" "-h -hb:\n\t[INPUT] is a Harwell-Boeing format matrix.\n" "-r -rb:\n\t[INPUT] is a Rutherford-Boeing format matrix.\n" "-t -triplet:\n\t[INPUT] is a triplet format matrix.\n", argv[0]); return 0; } else { switch (argv[1][1]) { case 'H': case 'h': printf("Input a Harwell-Boeing format matrix:\n"); dreadhb(&m, &n, &nnz, &a, &asub, &xa); break; case 'R': case 'r': printf("Input a Rutherford-Boeing format matrix:\n"); dreadrb(&m, &n, &nnz, &a, &asub, &xa); break; case 'T': case 't': printf("Input a triplet format matrix:\n"); dreadtriple(&m, &n, &nnz, &a, &asub, &xa); break; default: printf("Unrecognized format.\n"); return 0; } } #else /* Read the matrix in Harwell-Boeing format. */ dreadhb(&m, &n, &nnz, &a, &asub, &xa); #endif dCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE); Astore = A.Store; printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz); nrhs = 1; if ( !(rhs = doubleMalloc(m * nrhs)) ) ABORT("Malloc fails for rhs[]."); dCreate_Dense_Matrix(&B, m, nrhs, rhs, m, SLU_DN, SLU_D, SLU_GE); xact = doubleMalloc(n * nrhs); ldx = n; dGenXtrue(n, nrhs, xact, ldx); dFillRHS(options.Trans, nrhs, xact, ldx, &A, &B); if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[]."); /* Initialize the statistics variables. */ StatInit(&stat); dgssv(&options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info); if ( info == 0 ) { /* This is how you could access the solution matrix. */ double *sol = (double*) ((DNformat*) B.Store)->nzval; /* Compute the infinity norm of the error. */ dinf_norm_error(nrhs, &B, xact); Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/nnz); dQuerySpace(&L, &U, &mem_usage); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); } else { printf("dgssv() error returns INFO= %d\n", info); if ( info <= n ) { /* factorization completes */ dQuerySpace(&L, &U, &mem_usage); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); } } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); SUPERLU_FREE (rhs); SUPERLU_FREE (xact); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); Destroy_CompCol_Matrix(&A); Destroy_SuperMatrix_Store(&B); Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Exit main()"); #endif }
void tlin::createS(SuperMatrix &A, int rows, int cols, int nnz, int *colptr, int *rowind, double *values) { dCreate_CompCol_Matrix(&A, rows, cols, nnz, values, rowind, colptr, SLU_NC, SLU_D, SLU_GE); }
void dgstrf (superlu_options_t *options, SuperMatrix *A, double drop_tol, int relax, int panel_size, int *etree, void *work, int lwork, int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U, SuperLUStat_t *stat, int *info) { /* * Purpose * ======= * * DGSTRF computes an LU factorization of a general sparse m-by-n * matrix A using partial pivoting with row interchanges. * The factorization has the form * Pr * A = L * U * where Pr is a row permutation matrix, L is lower triangular with unit * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper * triangular (upper trapezoidal if A->nrow < A->ncol). * * See supermatrix.h for the definition of 'SuperMatrix' structure. * * Arguments * ========= * * options (input) superlu_options_t* * The structure defines the input parameters to control * how the LU decomposition will be performed. * * A (input) SuperMatrix* * Original matrix A, permuted by columns, of dimension * (A->nrow, A->ncol). The type of A can be: * Stype = SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE. * * drop_tol (input) double (NOT IMPLEMENTED) * Drop tolerance parameter. At step j of the Gaussian elimination, * if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij. * 0 <= drop_tol <= 1. The default value of drop_tol is 0. * * relax (input) int * To control degree of relaxing supernodes. If the number * of nodes (columns) in a subtree of the elimination tree is less * than relax, this subtree is considered as one supernode, * regardless of the row structures of those columns. * * panel_size (input) int * A panel consists of at most panel_size consecutive columns. * * etree (input) int*, dimension (A->ncol) * Elimination tree of A'*A. * Note: etree is a vector of parent pointers for a forest whose * vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol. * On input, the columns of A should be permuted so that the * etree is in a certain postorder. * * work (input/output) void*, size (lwork) (in bytes) * User-supplied work space and space for the output data structures. * Not referenced if lwork = 0; * * lwork (input) int * Specifies the size of work array in bytes. * = 0: allocate space internally by system malloc; * > 0: use user-supplied work array of length lwork in bytes, * returns error if space runs out. * = -1: the routine guesses the amount of space needed without * performing the factorization, and returns it in * *info; no other side effects. * * perm_c (input) int*, dimension (A->ncol) * Column permutation vector, which defines the * permutation matrix Pc; perm_c[i] = j means column i of A is * in position j in A*Pc. * When searching for diagonal, perm_c[*] is applied to the * row subscripts of A, so that diagonal threshold pivoting * can find the diagonal of A, rather than that of A*Pc. * * perm_r (input/output) int*, dimension (A->nrow) * Row permutation vector which defines the permutation matrix Pr, * perm_r[i] = j means row i of A is in position j in Pr*A. * If options->Fact = SamePattern_SameRowPerm, the pivoting routine * will try to use the input perm_r, unless a certain threshold * criterion is violated. In that case, perm_r is overwritten by * a new permutation determined by partial pivoting or diagonal * threshold pivoting. * Otherwise, perm_r is output argument; * * L (output) SuperMatrix* * The factor L from the factorization Pr*A=L*U; use compressed row * subscripts storage for supernodes, i.e., L has type: * Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU. * * U (output) SuperMatrix* * The factor U from the factorization Pr*A*Pc=L*U. Use column-wise * storage scheme, i.e., U has types: Stype = SLU_NC, * Dtype = SLU_D, Mtype = SLU_TRU. * * stat (output) SuperLUStat_t* * Record the statistics on runtime and floating-point operation count. * See util.h for the definition of 'SuperLUStat_t'. * * info (output) int* * = 0: successful exit * < 0: if info = -i, the i-th argument had an illegal value * > 0: if info = i, and i is * <= A->ncol: U(i,i) is exactly zero. The factorization has * been completed, but the factor U is exactly singular, * and division by zero will occur if it is used to solve a * system of equations. * > A->ncol: number of bytes allocated when memory allocation * failure occurred, plus A->ncol. If lwork = -1, it is * the estimated amount of space needed, plus A->ncol. * * ====================================================================== * * Local Working Arrays: * ====================== * m = number of rows in the matrix * n = number of columns in the matrix * * xprune[0:n-1]: xprune[*] points to locations in subscript * vector lsub[*]. For column i, xprune[i] denotes the point where * structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need * to be traversed for symbolic factorization. * * marker[0:3*m-1]: marker[i] = j means that node i has been * reached when working on column j. * Storage: relative to original row subscripts * NOTE: There are 3 of them: marker/marker1 are used for panel dfs, * see dpanel_dfs.c; marker2 is used for inner-factorization, * see dcolumn_dfs.c. * * parent[0:m-1]: parent vector used during dfs * Storage: relative to new row subscripts * * xplore[0:m-1]: xplore[i] gives the location of the next (dfs) * unexplored neighbor of i in lsub[*] * * segrep[0:nseg-1]: contains the list of supernodal representatives * in topological order of the dfs. A supernode representative is the * last column of a supernode. * The maximum size of segrep[] is n. * * repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a * supernodal representative r, repfnz[r] is the location of the first * nonzero in this segment. It is also used during the dfs: repfnz[r]>0 * indicates the supernode r has been explored. * NOTE: There are W of them, each used for one column of a panel. * * panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below * the panel diagonal. These are filled in during dpanel_dfs(), and are * used later in the inner LU factorization within the panel. * panel_lsub[]/dense[] pair forms the SPA data structure. * NOTE: There are W of them. * * dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values; * NOTE: there are W of them. * * tempv[0:*]: real temporary used for dense numeric kernels; * The size of this array is defined by NUM_TEMPV() in dsp_defs.h. * */ /* Local working arrays */ NCPformat *Astore; int *iperm_r = NULL; /* inverse of perm_r; used when options->Fact == SamePattern_SameRowPerm */ int *iperm_c; /* inverse of perm_c */ int *iwork; double *dwork; int *segrep, *repfnz, *parent, *xplore; int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */ int *xprune; int *marker; double *dense, *tempv; int *relax_end; double *a; int *asub; int *xa_begin, *xa_end; int *xsup, *supno; int *xlsub, *xlusup, *xusub; int nzlumax; static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */ /* Local scalars */ fact_t fact = options->Fact; double diag_pivot_thresh = options->DiagPivotThresh; int pivrow; /* pivotal row number in the original matrix A */ int nseg1; /* no of segments in U-column above panel row jcol */ int nseg; /* no of segments in each U-column */ register int jcol; register int kcol; /* end column of a relaxed snode */ register int icol; register int i, k, jj, new_next, iinfo; int m, n, min_mn, jsupno, fsupc, nextlu, nextu; int w_def; /* upper bound on panel width */ int usepr, iperm_r_allocated = 0; int nnzL, nnzU; int *panel_histo = stat->panel_histo; flops_t *ops = stat->ops; iinfo = 0; m = A->nrow; n = A->ncol; min_mn = SUPERLU_MIN(m, n); Astore = A->Store; a = Astore->nzval; asub = Astore->rowind; xa_begin = Astore->colbeg; xa_end = Astore->colend; /* Allocate storage common to the factor routines */ *info = dLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size, L, U, &Glu, &iwork, &dwork); if ( *info ) return; xsup = Glu.xsup; supno = Glu.supno; xlsub = Glu.xlsub; xlusup = Glu.xlusup; xusub = Glu.xusub; SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore, &repfnz, &panel_lsub, &xprune, &marker); dSetRWork(m, panel_size, dwork, &dense, &tempv); usepr = (fact == SamePattern_SameRowPerm); if ( usepr ) { /* Compute the inverse of perm_r */ iperm_r = (int *) intMalloc(m); for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k; iperm_r_allocated = 1; } iperm_c = (int *) intMalloc(n); for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k; /* Identify relaxed snodes */ relax_end = (int *) intMalloc(n); if ( options->SymmetricMode == YES ) { heap_relax_snode(n, etree, relax, marker, relax_end); } else { relax_snode(n, etree, relax, marker, relax_end); } ifill (perm_r, m, EMPTY); ifill (marker, m * NO_MARKER, EMPTY); supno[0] = -1; xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0; w_def = panel_size; /* * Work on one "panel" at a time. A panel is one of the following: * (a) a relaxed supernode at the bottom of the etree, or * (b) panel_size contiguous columns, defined by the user */ for (jcol = 0; jcol < min_mn; ) { if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */ kcol = relax_end[jcol]; /* end of the relaxed snode */ panel_histo[kcol-jcol+1]++; /* -------------------------------------- * Factorize the relaxed supernode(jcol:kcol) * -------------------------------------- */ /* Determine the union of the row structure of the snode */ if ( (*info = dsnode_dfs(jcol, kcol, asub, xa_begin, xa_end, xprune, marker, &Glu)) != 0 ) return; nextu = xusub[jcol]; nextlu = xlusup[jcol]; jsupno = supno[jcol]; fsupc = xsup[jsupno]; new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1); nzlumax = Glu.nzlumax; while ( new_next > nzlumax ) { if ( (*info = dLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)) ) return; } for (icol = jcol; icol<= kcol; icol++) { xusub[icol+1] = nextu; /* Scatter into SPA dense[*] */ for (k = xa_begin[icol]; k < xa_end[icol]; k++) dense[asub[k]] = a[k]; /* Numeric update within the snode */ dsnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat); if ( (*info = dpivotL(icol, diag_pivot_thresh, &usepr, perm_r, iperm_r, iperm_c, &pivrow, &Glu, stat)) ) if ( iinfo == 0 ) iinfo = *info; #ifdef DEBUG dprint_lu_col("[1]: ", icol, pivrow, xprune, &Glu); #endif } jcol = icol; } else { /* Work on one panel of panel_size columns */ /* Adjust panel_size so that a panel won't overlap with the next * relaxed snode. */ panel_size = w_def; for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) if ( relax_end[k] != EMPTY ) { panel_size = k - jcol; break; } if ( k == min_mn ) panel_size = min_mn - jcol; panel_histo[panel_size]++; /* symbolic factor on a panel of columns */ dpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1, dense, panel_lsub, segrep, repfnz, xprune, marker, parent, xplore, &Glu); /* numeric sup-panel updates in topological order */ dpanel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, &Glu, stat); /* Sparse LU within the panel, and below panel diagonal */ for ( jj = jcol; jj < jcol + panel_size; jj++) { k = (jj - jcol) * m; /* column index for w-wide arrays */ nseg = nseg1; /* Begin after all the panel segments */ if ((*info = dcolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k], segrep, &repfnz[k], xprune, marker, parent, xplore, &Glu)) != 0) return; /* Numeric updates */ if ((*info = dcolumn_bmod(jj, (nseg - nseg1), &dense[k], tempv, &segrep[nseg1], &repfnz[k], jcol, &Glu, stat)) != 0) return; /* Copy the U-segments to ucol[*] */ if ((*info = dcopy_to_ucol(jj, nseg, segrep, &repfnz[k], perm_r, &dense[k], &Glu)) != 0) return; if ( (*info = dpivotL(jj, diag_pivot_thresh, &usepr, perm_r, iperm_r, iperm_c, &pivrow, &Glu, stat)) ) if ( iinfo == 0 ) iinfo = *info; /* Prune columns (0:jj-1) using column jj */ dpruneL(jj, perm_r, pivrow, nseg, segrep, &repfnz[k], xprune, &Glu); /* Reset repfnz[] for this column */ resetrep_col (nseg, segrep, &repfnz[k]); #ifdef DEBUG dprint_lu_col("[2]: ", jj, pivrow, xprune, &Glu); #endif } jcol += panel_size; /* Move to the next panel */ } /* else */ } /* for */ *info = iinfo; if ( m > n ) { k = 0; for (i = 0; i < m; ++i) if ( perm_r[i] == EMPTY ) { perm_r[i] = n + k; ++k; } } countnz(min_mn, xprune, &nnzL, &nnzU, &Glu); fixupL(min_mn, perm_r, &Glu); dLUWorkFree(iwork, dwork, &Glu); /* Free work space and compress storage */ if ( fact == SamePattern_SameRowPerm ) { /* L and U structures may have changed due to possibly different pivoting, even though the storage is available. There could also be memory expansions, so the array locations may have changed, */ ((SCformat *)L->Store)->nnz = nnzL; ((SCformat *)L->Store)->nsuper = Glu.supno[n]; ((SCformat *)L->Store)->nzval = Glu.lusup; ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup; ((SCformat *)L->Store)->rowind = Glu.lsub; ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub; ((NCformat *)U->Store)->nnz = nnzU; ((NCformat *)U->Store)->nzval = Glu.ucol; ((NCformat *)U->Store)->rowind = Glu.usub; ((NCformat *)U->Store)->colptr = Glu.xusub; } else { dCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno, Glu.xsup, SLU_SC, SLU_D, SLU_TRLU); dCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, Glu.usub, Glu.xusub, SLU_NC, SLU_D, SLU_TRU); } ops[FACT] += ops[TRSV] + ops[GEMV]; if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); SUPERLU_FREE (iperm_c); SUPERLU_FREE (relax_end); }
PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F,Mat A,const MatFactorInfo *info) { Mat_SuperLU *lu = (Mat_SuperLU*)F->spptr; Mat_SeqAIJ *aa; PetscErrorCode ierr; PetscInt sinfo; PetscReal ferr, berr; NCformat *Ustore; SCformat *Lstore; PetscFunctionBegin; if (lu->flg == SAME_NONZERO_PATTERN){ /* successing numerical factorization */ lu->options.Fact = SamePattern; /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */ Destroy_SuperMatrix_Store(&lu->A); if (lu->options.Equil){ ierr = MatCopy_SeqAIJ(A,lu->A_dup,SAME_NONZERO_PATTERN);CHKERRQ(ierr); } if ( lu->lwork >= 0 ) { Destroy_SuperNode_Matrix(&lu->L); Destroy_CompCol_Matrix(&lu->U); lu->options.Fact = SamePattern; } } /* Create the SuperMatrix for lu->A=A^T: Since SuperLU likes column-oriented matrices,we pass it the transpose, and then solve A^T X = B in MatSolve(). */ if (lu->options.Equil){ aa = (Mat_SeqAIJ*)(lu->A_dup)->data; } else { aa = (Mat_SeqAIJ*)(A)->data; } #if defined(PETSC_USE_COMPLEX) zCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i, SLU_NC,SLU_Z,SLU_GE); #else dCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i, SLU_NC,SLU_D,SLU_GE); #endif /* Numerical factorization */ lu->B.ncol = 0; /* Indicate not to solve the system */ if (F->factortype == MAT_FACTOR_LU){ #if defined(PETSC_USE_COMPLEX) zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->mem_usage, &lu->stat, &sinfo); #else dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->mem_usage, &lu->stat, &sinfo); #endif } else if (F->factortype == MAT_FACTOR_ILU){ /* Compute the incomplete factorization, condition number and pivot growth */ #if defined(PETSC_USE_COMPLEX) zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->mem_usage, &lu->stat, &sinfo); #else dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->mem_usage, &lu->stat, &sinfo); #endif } else { SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); } if ( !sinfo || sinfo == lu->A.ncol+1 ) { if ( lu->options.PivotGrowth ) ierr = PetscPrintf(PETSC_COMM_SELF," Recip. pivot growth = %e\n", lu->rpg); if ( lu->options.ConditionNumber ) ierr = PetscPrintf(PETSC_COMM_SELF," Recip. condition number = %e\n", lu->rcond); } else if ( sinfo > 0 ){ if ( lu->lwork == -1 ) { ierr = PetscPrintf(PETSC_COMM_SELF," ** Estimated memory: %D bytes\n", sinfo - lu->A.ncol); } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %D",sinfo); } else { /* sinfo < 0 */ SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", sinfo,-sinfo); } if ( lu->options.PrintStat ) { ierr = PetscPrintf(PETSC_COMM_SELF,"MatLUFactorNumeric_SuperLU():\n"); StatPrint(&lu->stat); Lstore = (SCformat *) lu->L.Store; Ustore = (NCformat *) lu->U.Store; ierr = PetscPrintf(PETSC_COMM_SELF," No of nonzeros in factor L = %D\n", Lstore->nnz); ierr = PetscPrintf(PETSC_COMM_SELF," No of nonzeros in factor U = %D\n", Ustore->nnz); ierr = PetscPrintf(PETSC_COMM_SELF," No of nonzeros in L+U = %D\n", Lstore->nnz + Ustore->nnz - lu->A.ncol); ierr = PetscPrintf(PETSC_COMM_SELF," L\\U MB %.3f\ttotal MB needed %.3f\n", lu->mem_usage.for_lu/1e6, lu->mem_usage.total_needed/1e6); } lu->flg = SAME_NONZERO_PATTERN; F->ops->solve = MatSolve_SuperLU; F->ops->solvetranspose = MatSolveTranspose_SuperLU; F->ops->matsolve = MatMatSolve_SuperLU; PetscFunctionReturn(0); }
bool SparseMatrix::solveSLU (Vector& B) { int ierr = ncol+1; if (!factored) this->optimiseSLU(); #ifdef HAS_SUPERLU_MT if (!slu) { // Create a new SuperLU matrix slu = new SuperLUdata; slu->perm_c = new int[ncol]; slu->perm_r = new int[nrow]; dCreate_CompCol_Matrix(&slu->A, nrow, ncol, this->size(), &A.front(), &JA.front(), &IA.front(), SLU_NC, SLU_D, SLU_GE); } else { Destroy_SuperMatrix_Store(&slu->A); Destroy_SuperNode_Matrix(&slu->L); Destroy_CompCol_Matrix(&slu->U); dCreate_CompCol_Matrix(&slu->A, nrow, ncol, this->size(), &A.front(), &JA.front(), &IA.front(), SLU_NC, SLU_D, SLU_GE); } // Get column permutation vector perm_c[], according to permc_spec: // permc_spec = 0: natural ordering // permc_spec = 1: minimum degree ordering on structure of A'*A // permc_spec = 2: minimum degree ordering on structure of A'+A // permc_spec = 3: approximate minimum degree for unsymmetric matrices int permc_spec = 1; get_perm_c(permc_spec, &slu->A, slu->perm_c); // Create right-hand-side/solution vector(s) size_t nrhs = B.size() / nrow; SuperMatrix Bmat; dCreate_Dense_Matrix(&Bmat, nrow, nrhs, B.ptr(), nrow, SLU_DN, SLU_D, SLU_GE); // Invoke the simple driver pdgssv(numThreads, &slu->A, slu->perm_c, slu->perm_r, &slu->L, &slu->U, &Bmat, &ierr); if (ierr > 0) std::cerr <<"SuperLU_MT Failure "<< ierr << std::endl; Destroy_SuperMatrix_Store(&Bmat); #elif defined(HAS_SUPERLU) if (!slu) { // Create a new SuperLU matrix slu = new SuperLUdata(1); slu->perm_c = new int[ncol]; slu->perm_r = new int[nrow]; dCreate_CompCol_Matrix(&slu->A, nrow, ncol, this->size(), &A.front(), &JA.front(), &IA.front(), SLU_NC, SLU_D, SLU_GE); } else if (factored) slu->opts->Fact = FACTORED; // Re-use previous factorization else { Destroy_SuperMatrix_Store(&slu->A); Destroy_SuperNode_Matrix(&slu->L); Destroy_CompCol_Matrix(&slu->U); dCreate_CompCol_Matrix(&slu->A, nrow, ncol, this->size(), &A.front(), &JA.front(), &IA.front(), SLU_NC, SLU_D, SLU_GE); } // Create right-hand-side/solution vector(s) size_t nrhs = B.size() / nrow; SuperMatrix Bmat; dCreate_Dense_Matrix(&Bmat, nrow, nrhs, B.ptr(), nrow, SLU_DN, SLU_D, SLU_GE); SuperLUStat_t stat; StatInit(&stat); // Invoke the simple driver dgssv(slu->opts, &slu->A, slu->perm_c, slu->perm_r, &slu->L, &slu->U, &Bmat, &stat, &ierr); if (ierr > 0) std::cerr <<"SuperLU Failure "<< ierr << std::endl; else factored = true; if (printSLUstat) StatPrint(&stat); StatFree(&stat); Destroy_SuperMatrix_Store(&Bmat); #else std::cerr <<"SparseMatrix::solve: SuperLU solver not available"<< std::endl; #endif return ierr == 0; }
main(int argc, char *argv[]) { SuperMatrix A, AC, L, U, B; NCformat *Astore; SCPformat *Lstore; NCPformat *Ustore; superlumt_options_t superlumt_options; pxgstrf_shared_t pxgstrf_shared; pdgstrf_threadarg_t *pdgstrf_threadarg; int nprocs; fact_t fact; trans_t trans; yes_no_t refact, usepr; double u, drop_tol; double *a; int *asub, *xa; int *perm_c; /* column permutation vector */ int *perm_r; /* row permutations from partial pivoting */ void *work; int info, lwork, nrhs, ldx; int m, n, nnz, permc_spec, panel_size, relax; int i, firstfact; double *rhsb, *xact; Gstat_t Gstat; flops_t flopcnt; void parse_command_line(); /* Default parameters to control factorization. */ nprocs = 1; fact = EQUILIBRATE; trans = NOTRANS; panel_size = sp_ienv(1); relax = sp_ienv(2); u = 1.0; usepr = NO; drop_tol = 0.0; work = NULL; lwork = 0; nrhs = 1; /* Get the number of processes from command line. */ parse_command_line(argc, argv, &nprocs); /* Read the input matrix stored in Harwell-Boeing format. */ dreadhb(&m, &n, &nnz, &a, &asub, &xa); /* Set up the sparse matrix data structure for A. */ dCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE); if (!(rhsb = doubleMalloc(m * nrhs))) SUPERLU_ABORT("Malloc fails for rhsb[]."); dCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_D, SLU_GE); xact = doubleMalloc(n * nrhs); ldx = n; dGenXtrue(n, nrhs, xact, ldx); dFillRHS(trans, nrhs, xact, ldx, &A, &B); if (!(perm_r = intMalloc(m))) SUPERLU_ABORT("Malloc fails for perm_r[]."); if (!(perm_c = intMalloc(n))) SUPERLU_ABORT("Malloc fails for perm_c[]."); /******************************** * THE FIRST TIME FACTORIZATION * ********************************/ /* ------------------------------------------------------------ Allocate storage and initialize statistics variables. ------------------------------------------------------------*/ StatAlloc(n, nprocs, panel_size, relax, &Gstat); StatInit(n, nprocs, &Gstat); /* ------------------------------------------------------------ Get column permutation vector perm_c[], according to permc_spec: permc_spec = 0: natural ordering permc_spec = 1: minimum degree ordering on structure of A'*A permc_spec = 2: minimum degree ordering on structure of A'+A permc_spec = 3: approximate minimum degree for unsymmetric matrices ------------------------------------------------------------*/ permc_spec = 1; get_perm_c(permc_spec, &A, perm_c); /* ------------------------------------------------------------ Initialize the option structure superlumt_options using the user-input parameters; Apply perm_c to the columns of original A to form AC. ------------------------------------------------------------*/ refact= NO; pdgstrf_init(nprocs, fact, trans, refact, panel_size, relax, u, usepr, drop_tol, perm_c, perm_r, work, lwork, &A, &AC, &superlumt_options, &Gstat); /* ------------------------------------------------------------ Compute the LU factorization of A. The following routine will create nprocs threads. ------------------------------------------------------------*/ pdgstrf(&superlumt_options, &AC, perm_r, &L, &U, &Gstat, &info); flopcnt = 0; for (i = 0; i < nprocs; ++i) flopcnt += Gstat.procstat[i].fcops; Gstat.ops[FACT] = flopcnt; /* ------------------------------------------------------------ Solve the system A*X=B, overwriting B with X. ------------------------------------------------------------*/ dgstrs(trans, &L, &U, perm_r, perm_c, &B, &Gstat, &info); printf("\n** Result of sparse LU **\n"); dinf_norm_error(nrhs, &B, xact); /* Check inf. norm of the error */ Destroy_CompCol_Permuted(&AC); /* Free extra arrays in AC. */ /********************************* * THE SUBSEQUENT FACTORIZATIONS * *********************************/ /* ------------------------------------------------------------ Re-initialize statistics variables and options used by the factorization routine pdgstrf(). ------------------------------------------------------------*/ StatInit(n, nprocs, &Gstat); refact= YES; pdgstrf_init(nprocs, fact, trans, refact, panel_size, relax, u, usepr, drop_tol, perm_c, perm_r, work, lwork, &A, &AC, &superlumt_options, &Gstat); /* ------------------------------------------------------------ Compute the LU factorization of A. The following routine will create nprocs threads. ------------------------------------------------------------*/ pdgstrf(&superlumt_options, &AC, perm_r, &L, &U, &Gstat, &info); flopcnt = 0; for (i = 0; i < nprocs; ++i) flopcnt += Gstat.procstat[i].fcops; Gstat.ops[FACT] = flopcnt; /* ------------------------------------------------------------ Re-generate right-hand side B, then solve A*X= B. ------------------------------------------------------------*/ dFillRHS(trans, nrhs, xact, ldx, &A, &B); dgstrs(trans, &L, &U, perm_r, perm_c, &B, &Gstat, &info); /* ------------------------------------------------------------ Deallocate storage after factorization. ------------------------------------------------------------*/ pxgstrf_finalize(&superlumt_options, &AC); printf("\n** Result of sparse LU **\n"); dinf_norm_error(nrhs, &B, xact); /* Check inf. norm of the error */ Lstore = (SCPformat *) L.Store; Ustore = (NCPformat *) U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); fflush(stdout); SUPERLU_FREE (rhsb); SUPERLU_FREE (xact); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); Destroy_CompCol_Matrix(&A); Destroy_SuperMatrix_Store(&B); if ( lwork >= 0 ) { Destroy_SuperNode_SCP(&L); Destroy_CompCol_NCP(&U); } StatFree(&Gstat); }
bool SparseMatrix::solveSLUx (Vector& B, Real* rcond) { int ierr = ncol+1; if (!factored) this->optimiseSLU(); #ifdef HAS_SUPERLU_MT if (!slu) { // Create a new SuperLU matrix slu = new SuperLUdata(numThreads); slu->equed = NOEQUIL; slu->perm_c = new int[ncol]; slu->perm_r = new int[nrow]; slu->C = new Real[ncol]; slu->R = new Real[nrow]; slu->opts->etree = new int[ncol]; slu->opts->colcnt_h = new int[ncol]; slu->opts->part_super_h = new int[ncol]; memset(slu->opts->colcnt_h, 0, ncol*sizeof(int)); memset(slu->opts->part_super_h, 0, ncol*sizeof(int)); memset(slu->opts->etree, 0, ncol*sizeof(int)); dCreate_CompCol_Matrix(&slu->A, nrow, ncol, this->size(), &A.front(), &JA.front(), &IA.front(), SLU_NC, SLU_D, SLU_GE); // Get column permutation vector perm_c[], according to permc_spec: // permc_spec = 0: natural ordering // permc_spec = 1: minimum degree ordering on structure of A'*A // permc_spec = 2: minimum degree ordering on structure of A'+A // permc_spec = 3: approximate minimum degree for unsymmetric matrices int permc_spec = 1; get_perm_c(permc_spec, &slu->A, slu->perm_c); } else if (factored) slu->opts->fact = FACTORED; // Re-use previous factorization else slu->opts->refact = YES; // Re-use previous ordering // Create right-hand-side and solution vector(s) Vector X(B.size()); SuperMatrix Bmat, Xmat; const size_t nrhs = B.size() / nrow; dCreate_Dense_Matrix(&Bmat, nrow, nrhs, B.ptr(), nrow, SLU_DN, SLU_D, SLU_GE); dCreate_Dense_Matrix(&Xmat, nrow, nrhs, X.ptr(), nrow, SLU_DN, SLU_D, SLU_GE); Real ferr[nrhs], berr[nrhs]; superlu_memusage_t mem_usage; // Invoke the expert driver pdgssvx(numThreads, slu->opts, &slu->A, slu->perm_c, slu->perm_r, &slu->equed, slu->R, slu->C, &slu->L, &slu->U, &Bmat, &Xmat, &slu->rpg, &slu->rcond, ferr, berr, &mem_usage, &ierr); B.swap(X); if (ierr > 0) std::cerr <<"SuperLU_MT Failure "<< ierr << std::endl; else if (!factored) { factored = true; if (rcond) *rcond = slu->rcond; } Destroy_SuperMatrix_Store(&Bmat); Destroy_SuperMatrix_Store(&Xmat); #elif defined(HAS_SUPERLU) if (!slu) { // Create a new SuperLU matrix slu = new SuperLUdata(1); slu->perm_c = new int[ncol]; slu->perm_r = new int[nrow]; slu->etree = new int[ncol]; slu->C = new Real[ncol]; slu->R = new Real[nrow]; dCreate_CompCol_Matrix(&slu->A, nrow, ncol, this->size(), &A.front(), &JA.front(), &IA.front(), SLU_NC, SLU_D, SLU_GE); } else if (factored) slu->opts->Fact = FACTORED; // Re-use previous factorization else { Destroy_SuperMatrix_Store(&slu->A); Destroy_SuperNode_Matrix(&slu->L); Destroy_CompCol_Matrix(&slu->U); dCreate_CompCol_Matrix(&slu->A, nrow, ncol, this->size(), &A.front(), &JA.front(), &IA.front(), SLU_NC, SLU_D, SLU_GE); } // Create right-hand-side vector and solution vector Vector X(B.size()); SuperMatrix Bmat, Xmat; const size_t nrhs = B.size() / nrow; dCreate_Dense_Matrix(&Bmat, nrow, nrhs, B.ptr(), nrow, SLU_DN, SLU_D, SLU_GE); dCreate_Dense_Matrix(&Xmat, nrow, nrhs, X.ptr(), nrow, SLU_DN, SLU_D, SLU_GE); slu->opts->ConditionNumber = printSLUstat || rcond ? YES : NO; slu->opts->PivotGrowth = printSLUstat ? YES : NO; void* work = 0; int lwork = 0; Real ferr[nrhs], berr[nrhs]; mem_usage_t mem_usage; SuperLUStat_t stat; StatInit(&stat); // Invoke the expert driver #if SUPERLU_VERSION == 5 GlobalLU_t Glu; dgssvx(slu->opts, &slu->A, slu->perm_c, slu->perm_r, slu->etree, slu->equed, slu->R, slu->C, &slu->L, &slu->U, work, lwork, &Bmat, &Xmat, &slu->rpg, &slu->rcond, ferr, berr, &Glu, &mem_usage, &stat, &ierr); #else dgssvx(slu->opts, &slu->A, slu->perm_c, slu->perm_r, slu->etree, slu->equed, slu->R, slu->C, &slu->L, &slu->U, work, lwork, &Bmat, &Xmat, &slu->rpg, &slu->rcond, ferr, berr, &mem_usage, &stat, &ierr); #endif B.swap(X); if (ierr > 0) std::cerr <<"SuperLU Failure "<< ierr << std::endl; else if (!factored) { factored = true; if (rcond) *rcond = slu->rcond; } if (printSLUstat) { StatPrint(&stat); IFEM::cout <<"Reciprocal condition number = "<< slu->rcond <<"\nReciprocal pivot growth = "<< slu->rpg << std::endl; } StatFree(&stat); Destroy_SuperMatrix_Store(&Bmat); Destroy_SuperMatrix_Store(&Xmat); #else std::cerr <<"SparseMatrix::solve: SuperLU solver not available"<< std::endl; #endif return ierr == 0; }
void SuperLUSolver<double>::create_csc_matrix(SuperMatrix *A, int m, int n, int nnz, SuperLuType<double>::Scalar *nzval, int *rowind, int *colptr, Stype_t stype, Dtype_t dtype, Mtype_t mtype) { dCreate_CompCol_Matrix(A, m, n, nnz, nzval, rowind, colptr, stype, dtype, mtype); }
int main ( ) /******************************************************************************/ /* Purpose: D_SAMPLE_ST tests the SUPERLU solver with a 5x5 double precision real matrix. Discussion: The general (GE) representation of the matrix is: [ 19 0 21 21 0 12 21 0 0 0 0 12 16 0 0 0 0 0 5 21 12 12 0 0 18 ] The (0-based) compressed column (CC) representation of this matrix is: I CC A -- -- -- 0 0 19 1 12 4 12 1 3 21 2 12 4 12 0 6 21 2 16 0 8 21 3 5 3 10 21 4 18 * 12 * The right hand side B and solution X are # B X -- -- ---------- 0 1 -0.03125 1 1 0.0654762 2 1 0.0133929 3 1 0.0625 4 1 0.0327381 Licensing: This code is distributed under the GNU LGPL license. Modified: 18 July 2014 Author: John Burkardt Reference: James Demmel, John Gilbert, Xiaoye Li, SuperLU Users's Guide. */ { SuperMatrix A; double *acc; double *b; double *b2; SuperMatrix B; int *ccc; int i; int *icc; int info; int j; SuperMatrix L; int m; int n; int nrhs = 1; int ncc; superlu_options_t options; int *perm_c; int permc_spec; int *perm_r; SuperLUStat_t stat; SuperMatrix U; timestamp ( ); printf ( "\n" ); printf ( "D_SAMPLE_ST:\n" ); printf ( " C version\n" ); printf ( " SUPERLU solves a double precision real linear system.\n" ); printf ( " The matrix is read from a Sparse Triplet (ST) file.\n" ); /* Read the matrix from a file associated with standard input, in sparse triplet (ST) format, into compressed column (CC) format. */ dreadtriple ( &m, &n, &ncc, &acc, &icc, &ccc ); /* Print the matrix. */ cc_print ( m, n, ncc, icc, ccc, acc, " CC Matrix:" ); /* Convert the compressed column (CC) matrix into a SuperMatrix A. */ dCreate_CompCol_Matrix ( &A, m, n, ncc, acc, icc, ccc, SLU_NC, SLU_D, SLU_GE ); /* Create the right-hand side matrix. */ b = ( double * ) malloc ( m * sizeof ( double ) ); for ( i = 0; i < m; i++ ) { b[i] = 1.0; } printf ( "\n" ); printf ( " Right hand side:\n" ); printf ( "\n" ); for ( i = 0; i < m; i++ ) { printf ( "%g\n", b[i] ); } /* Create Super Right Hand Side. */ dCreate_Dense_Matrix ( &B, m, nrhs, b, m, SLU_DN, SLU_D, SLU_GE ); /* Set space for the permutations. */ perm_r = ( int * ) malloc ( m * sizeof ( int ) ); perm_c = ( int * ) malloc ( n * sizeof ( int ) ); /* Set the input options. */ set_default_options ( &options ); options.ColPerm = NATURAL; /* Initialize the statistics variables. */ StatInit ( &stat ); /* Solve the linear system. */ dgssv ( &options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info ); dPrint_CompCol_Matrix ( ( char * ) "A", &A ); dPrint_CompCol_Matrix ( ( char * ) "U", &U ); dPrint_SuperNode_Matrix ( ( char * ) "L", &L ); print_int_vec ( ( char * ) "\nperm_r", m, perm_r ); /* By some miracle involving addresses, the solution has been put into the B vector. */ printf ( "\n" ); printf ( " Computed solution:\n" ); printf ( "\n" ); for ( i = 0; i < m; i++ ) { printf ( "%g\n", b[i] ); } /* Demonstrate that RHS is really the solution now. Multiply it by the matrix. */ b2 = cc_mv ( m, n, ncc, icc, ccc, acc, b ); printf ( "\n" ); printf ( " Product A*X:\n" ); printf ( "\n" ); for ( i = 0; i < m; i++ ) { printf ( "%g\n", b2[i] ); } /* Free memory. */ free ( b ); free ( b2 ); free ( perm_c ); free ( perm_r ); Destroy_SuperMatrix_Store ( &A ); Destroy_SuperMatrix_Store ( &B ); Destroy_SuperNode_Matrix ( &L ); Destroy_CompCol_Matrix ( &U ); StatFree ( &stat ); /* Terminate. */ printf ( "\n" ); printf ( "D_SAMPLE_ST:\n" ); printf ( " Normal end of execution.\n" ); printf ( "\n" ); timestamp ( ); return 0; }
int HYPRE_ParCSR_SuperLUSetup(HYPRE_Solver solver, HYPRE_ParCSRMatrix A_csr, HYPRE_ParVector b, HYPRE_ParVector x ) { #ifdef HAVE_SUPERLU int startRow, endRow, nrows, *partition, *AdiagI, *AdiagJ, nnz; int irow, colNum, index, *cscI, *cscJ, jcol, *colLengs; int *etree, permcSpec, lwork, panelSize, relax, info; double *AdiagA, *cscA, diagPivotThresh, dropTol; char refact[1]; hypre_CSRMatrix *Adiag; HYPRE_SuperLU *sluPtr; SuperMatrix sluAmat, auxAmat; superlu_options_t slu_options; SuperLUStat_t slu_stat; /* ---------------------------------------------------------------- */ /* get matrix information */ /* ---------------------------------------------------------------- */ sluPtr = (HYPRE_SuperLU *) solver; assert ( sluPtr != NULL ); HYPRE_ParCSRMatrixGetRowPartitioning( A_csr, &partition ); startRow = partition[0]; endRow = partition[1] - 1; nrows = endRow - startRow + 1; free( partition ); if ( startRow != 0 ) { printf("HYPRE_ParCSR_SuperLUSetup ERROR - start row != 0.\n"); return -1; } /* ---------------------------------------------------------------- */ /* get hypre matrix */ /* ---------------------------------------------------------------- */ Adiag = hypre_ParCSRMatrixDiag((hypre_ParCSRMatrix *) A_csr); AdiagI = hypre_CSRMatrixI(Adiag); AdiagJ = hypre_CSRMatrixJ(Adiag); AdiagA = hypre_CSRMatrixData(Adiag); nnz = AdiagI[nrows]; /* ---------------------------------------------------------------- */ /* convert the csr matrix into csc matrix */ /* ---------------------------------------------------------------- */ colLengs = (int *) malloc(nrows * sizeof(int)); for ( irow = 0; irow < nrows; irow++ ) colLengs[irow] = 0; for ( irow = 0; irow < nrows; irow++ ) for ( jcol = AdiagI[irow]; jcol < AdiagI[irow+1]; jcol++ ) colLengs[AdiagJ[jcol]]++; cscJ = (int *) malloc( (nrows+1) * sizeof(int) ); cscI = (int *) malloc( nnz * sizeof(int) ); cscA = (double *) malloc( nnz * sizeof(double) ); cscJ[0] = 0; nnz = 0; for ( jcol = 1; jcol <= nrows; jcol++ ) { nnz += colLengs[jcol-1]; cscJ[jcol] = nnz; } for ( irow = 0; irow < nrows; irow++ ) { for ( jcol = AdiagI[irow]; jcol < AdiagI[irow+1]; jcol++ ) { colNum = AdiagJ[jcol]; index = cscJ[colNum]++; cscI[index] = irow; cscA[index] = AdiagA[jcol]; } } cscJ[0] = 0; nnz = 0; for ( jcol = 1; jcol <= nrows; jcol++ ) { nnz += colLengs[jcol-1]; cscJ[jcol] = nnz; } free(colLengs); /* ---------------------------------------------------------------- */ /* create SuperMatrix */ /* ---------------------------------------------------------------- */ dCreate_CompCol_Matrix(&sluAmat,nrows,nrows,cscJ[nrows],cscA,cscI, cscJ, SLU_NC, SLU_D, SLU_GE); etree = (int *) malloc(nrows * sizeof(int)); sluPtr->permC_ = (int *) malloc(nrows * sizeof(int)); sluPtr->permR_ = (int *) malloc(nrows * sizeof(int)); permcSpec = 0; get_perm_c(permcSpec, &sluAmat, sluPtr->permC_); slu_options.Fact = DOFACT; slu_options.SymmetricMode = NO; sp_preorder(&slu_options, &sluAmat, sluPtr->permC_, etree, &auxAmat); diagPivotThresh = 1.0; dropTol = 0.0; panelSize = sp_ienv(1); relax = sp_ienv(2); StatInit(&slu_stat); lwork = 0; slu_options.ColPerm = MY_PERMC; slu_options.DiagPivotThresh = diagPivotThresh; dgstrf(&slu_options, &auxAmat, dropTol, relax, panelSize, etree, NULL, lwork, sluPtr->permC_, sluPtr->permR_, &(sluPtr->SLU_Lmat), &(sluPtr->SLU_Umat), &slu_stat, &info); Destroy_CompCol_Permuted(&auxAmat); Destroy_CompCol_Matrix(&sluAmat); free(etree); sluPtr->factorized_ = 1; StatFree(&slu_stat); return 0; #else printf("HYPRE_ParCSR_SuperLUSetup ERROR - SuperLU not enabled.\n"); *solver = (HYPRE_Solver) NULL; return -1; #endif }
/* Supernodal LU factor related */ static void Create_CompCol_Matrix (SuperMatrix *p1, int p2, int p3, int p4, double *p5, int *p6, int *p7, Stype_t p8, Dtype_t p9, Mtype_t p10) { dCreate_CompCol_Matrix(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10); }
void pdgssvx(int nprocs, superlumt_options_t *superlumt_options, SuperMatrix *A, int *perm_c, int *perm_r, equed_t *equed, double *R, double *C, SuperMatrix *L, SuperMatrix *U, SuperMatrix *B, SuperMatrix *X, double *recip_pivot_growth, double *rcond, double *ferr, double *berr, superlu_memusage_t *superlu_memusage, int *info) { /* * -- SuperLU MT routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley, * and Xerox Palo Alto Research Center. * September 10, 2007 * * Purpose * ======= * * pdgssvx() solves the system of linear equations A*X=B or A'*X=B, using * the LU factorization from dgstrf(). Error bounds on the solution and * a condition estimate are also provided. It performs the following steps: * * 1. If A is stored column-wise (A->Stype = NC): * * 1.1. If fact = EQUILIBRATE, scaling factors are computed to equilibrate * the system: * trans = NOTRANS: diag(R)*A*diag(C)*inv(diag(C))*X = diag(R)*B * trans = TRANS: (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B * trans = CONJ: (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A is * overwritten by diag(R)*A*diag(C) and B by diag(R)*B * (if trans = NOTRANS) or diag(C)*B (if trans = TRANS or CONJ). * * 1.2. Permute columns of A, forming A*Pc, where Pc is a permutation matrix * that usually preserves sparsity. * For more details of this step, see dsp_colorder.c. * * 1.3. If fact = DOFACT or EQUILIBRATE, the LU decomposition is used to * factor the matrix A (after equilibration if fact = EQUILIBRATE) as * Pr*A*Pc = L*U, with Pr determined by partial pivoting. * * 1.4. Compute the reciprocal pivot growth factor. * * 1.5. If some U(i,i) = 0, so that U is exactly singular, then the routine * returns with info = i. Otherwise, the factored form of A is used to * estimate the condition number of the matrix A. If the reciprocal of * the condition number is less than machine precision, * info = A->ncol+1 is returned as a warning, but the routine still * goes on to solve for X and computes error bounds as described below. * * 1.6. The system of equations is solved for X using the factored form * of A. * * 1.7. Iterative refinement is applied to improve the computed solution * matrix and calculate error bounds and backward error estimates * for it. * * 1.8. If equilibration was used, the matrix X is premultiplied by * diag(C) (if trans = NOTRANS) or diag(R) (if trans = TRANS or CONJ) * so that it solves the original system before equilibration. * * 2. If A is stored row-wise (A->Stype = NR), apply the above algorithm * to the tranpose of A: * * 2.1. If fact = EQUILIBRATE, scaling factors are computed to equilibrate * the system: * trans = NOTRANS:diag(R)*A'*diag(C)*inv(diag(C))*X = diag(R)*B * trans = TRANS: (diag(R)*A'*diag(C))**T *inv(diag(R))*X = diag(C)*B * trans = CONJ: (diag(R)*A'*diag(C))**H *inv(diag(R))*X = diag(C)*B * Whether or not the system will be equilibrated depends on the * scaling of the matrix A, but if equilibration is used, A' is * overwritten by diag(R)*A'*diag(C) and B by diag(R)*B * (if trans = NOTRANS) or diag(C)*B (if trans = TRANS or CONJ). * * 2.2. Permute columns of transpose(A) (rows of A), * forming transpose(A)*Pc, where Pc is a permutation matrix that * usually preserves sparsity. * For more details of this step, see dsp_colorder.c. * * 2.3. If fact = DOFACT or EQUILIBRATE, the LU decomposition is used to * factor the matrix A (after equilibration if fact = EQUILIBRATE) as * Pr*transpose(A)*Pc = L*U, with the permutation Pr determined by * partial pivoting. * * 2.4. Compute the reciprocal pivot growth factor. * * 2.5. If some U(i,i) = 0, so that U is exactly singular, then the routine * returns with info = i. Otherwise, the factored form of transpose(A) * is used to estimate the condition number of the matrix A. * If the reciprocal of the condition number is less than machine * precision, info = A->nrow+1 is returned as a warning, but the * routine still goes on to solve for X and computes error bounds * as described below. * * 2.6. The system of equations is solved for X using the factored form * of transpose(A). * * 2.7. Iterative refinement is applied to improve the computed solution * matrix and calculate error bounds and backward error estimates * for it. * * 2.8. If equilibration was used, the matrix X is premultiplied by * diag(C) (if trans = NOTRANS) or diag(R) (if trans = TRANS or CONJ) * so that it solves the original system before equilibration. * * See supermatrix.h for the definition of 'SuperMatrix' structure. * * Arguments * ========= * * nprocs (input) int * Number of processes (or threads) to be spawned and used to perform * the LU factorization by pdgstrf(). There is a single thread of * control to call pdgstrf(), and all threads spawned by pdgstrf() * are terminated before returning from pdgstrf(). * * superlumt_options (input) superlumt_options_t* * The structure defines the input parameters and data structure * to control how the LU factorization will be performed. * The following fields should be defined for this structure: * * o fact (fact_t) * Specifies whether or not the factored form of the matrix * A is supplied on entry, and if not, whether the matrix A should * be equilibrated before it is factored. * = FACTORED: On entry, L, U, perm_r and perm_c contain the * factored form of A. If equed is not NOEQUIL, the matrix A has * been equilibrated with scaling factors R and C. * A, L, U, perm_r are not modified. * = DOFACT: The matrix A will be factored, and the factors will be * stored in L and U. * = EQUILIBRATE: The matrix A will be equilibrated if necessary, * then factored into L and U. * * o trans (trans_t) * Specifies the form of the system of equations: * = NOTRANS: A * X = B (No transpose) * = TRANS: A**T * X = B (Transpose) * = CONJ: A**H * X = B (Transpose) * * o refact (yes_no_t) * Specifies whether this is first time or subsequent factorization. * = NO: this factorization is treated as the first one; * = YES: it means that a factorization was performed prior to this * one. Therefore, this factorization will re-use some * existing data structures, such as L and U storage, column * elimination tree, and the symbolic information of the * Householder matrix. * * o panel_size (int) * A panel consists of at most panel_size consecutive columns. * * o relax (int) * To control degree of relaxing supernodes. If the number * of nodes (columns) in a subtree of the elimination tree is less * than relax, this subtree is considered as one supernode, * regardless of the row structures of those columns. * * o diag_pivot_thresh (double) * Diagonal pivoting threshold. At step j of the Gaussian * elimination, if * abs(A_jj) >= diag_pivot_thresh * (max_(i>=j) abs(A_ij)), * use A_jj as pivot, else use A_ij with maximum magnitude. * 0 <= diag_pivot_thresh <= 1. The default value is 1, * corresponding to partial pivoting. * * o usepr (yes_no_t) * Whether the pivoting will use perm_r specified by the user. * = YES: use perm_r; perm_r is input, unchanged on exit. * = NO: perm_r is determined by partial pivoting, and is output. * * o drop_tol (double) (NOT IMPLEMENTED) * Drop tolerance parameter. At step j of the Gaussian elimination, * if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij. * 0 <= drop_tol <= 1. The default value of drop_tol is 0, * corresponding to not dropping any entry. * * o work (void*) of size lwork * User-supplied work space and space for the output data structures. * Not referenced if lwork = 0; * * o lwork (int) * Specifies the length of work array. * = 0: allocate space internally by system malloc; * > 0: use user-supplied work array of length lwork in bytes, * returns error if space runs out. * = -1: the routine guesses the amount of space needed without * performing the factorization, and returns it in * superlu_memusage->total_needed; no other side effects. * * A (input/output) SuperMatrix* * Matrix A in A*X=B, of dimension (A->nrow, A->ncol), where * A->nrow = A->ncol. Currently, the type of A can be: * Stype = NC or NR, Dtype = _D, Mtype = GE. In the future, * more general A will be handled. * * On entry, If superlumt_options->fact = FACTORED and equed is not * NOEQUIL, then A must have been equilibrated by the scaling factors * in R and/or C. On exit, A is not modified * if superlumt_options->fact = FACTORED or DOFACT, or * if superlumt_options->fact = EQUILIBRATE and equed = NOEQUIL. * * On exit, if superlumt_options->fact = EQUILIBRATE and equed is not * NOEQUIL, A is scaled as follows: * If A->Stype = NC: * equed = ROW: A := diag(R) * A * equed = COL: A := A * diag(C) * equed = BOTH: A := diag(R) * A * diag(C). * If A->Stype = NR: * equed = ROW: transpose(A) := diag(R) * transpose(A) * equed = COL: transpose(A) := transpose(A) * diag(C) * equed = BOTH: transpose(A) := diag(R) * transpose(A) * diag(C). * * perm_c (input/output) int* * If A->Stype = NC, Column permutation vector of size A->ncol, * which defines the permutation matrix Pc; perm_c[i] = j means * column i of A is in position j in A*Pc. * On exit, perm_c may be overwritten by the product of the input * perm_c and a permutation that postorders the elimination tree * of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree * is already in postorder. * * If A->Stype = NR, column permutation vector of size A->nrow, * which describes permutation of columns of tranpose(A) * (rows of A) as described above. * * perm_r (input/output) int* * If A->Stype = NC, row permutation vector of size A->nrow, * which defines the permutation matrix Pr, and is determined * by partial pivoting. perm_r[i] = j means row i of A is in * position j in Pr*A. * * If A->Stype = NR, permutation vector of size A->ncol, which * determines permutation of rows of transpose(A) * (columns of A) as described above. * * If superlumt_options->usepr = NO, perm_r is output argument; * If superlumt_options->usepr = YES, the pivoting routine will try * to use the input perm_r, unless a certain threshold criterion * is violated. In that case, perm_r is overwritten by a new * permutation determined by partial pivoting or diagonal * threshold pivoting. * * equed (input/output) equed_t* * Specifies the form of equilibration that was done. * = NOEQUIL: No equilibration. * = ROW: Row equilibration, i.e., A was premultiplied by diag(R). * = COL: Column equilibration, i.e., A was postmultiplied by diag(C). * = BOTH: Both row and column equilibration, i.e., A was replaced * by diag(R)*A*diag(C). * If superlumt_options->fact = FACTORED, equed is an input argument, * otherwise it is an output argument. * * R (input/output) double*, dimension (A->nrow) * The row scale factors for A or transpose(A). * If equed = ROW or BOTH, A (if A->Stype = NC) or transpose(A) * (if A->Stype = NR) is multiplied on the left by diag(R). * If equed = NOEQUIL or COL, R is not accessed. * If fact = FACTORED, R is an input argument; otherwise, R is output. * If fact = FACTORED and equed = ROW or BOTH, each element of R must * be positive. * * C (input/output) double*, dimension (A->ncol) * The column scale factors for A or transpose(A). * If equed = COL or BOTH, A (if A->Stype = NC) or trnspose(A) * (if A->Stype = NR) is multiplied on the right by diag(C). * If equed = NOEQUIL or ROW, C is not accessed. * If fact = FACTORED, C is an input argument; otherwise, C is output. * If fact = FACTORED and equed = COL or BOTH, each element of C must * be positive. * * L (output) SuperMatrix* * The factor L from the factorization * Pr*A*Pc=L*U (if A->Stype = NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = NR). * Uses compressed row subscripts storage for supernodes, i.e., * L has types: Stype = SCP, Dtype = _D, Mtype = TRLU. * * U (output) SuperMatrix* * The factor U from the factorization * Pr*A*Pc=L*U (if A->Stype = NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype = NR). * Uses column-wise storage scheme, i.e., U has types: * Stype = NCP, Dtype = _D, Mtype = TRU. * * B (input/output) SuperMatrix* * B has types: Stype = DN, Dtype = _D, Mtype = GE. * On entry, the right hand side matrix. * On exit, * if equed = NOEQUIL, B is not modified; otherwise * if A->Stype = NC: * if trans = NOTRANS and equed = ROW or BOTH, B is overwritten * by diag(R)*B; * if trans = TRANS or CONJ and equed = COL of BOTH, B is * overwritten by diag(C)*B; * if A->Stype = NR: * if trans = NOTRANS and equed = COL or BOTH, B is overwritten * by diag(C)*B; * if trans = TRANS or CONJ and equed = ROW of BOTH, B is * overwritten by diag(R)*B. * * X (output) SuperMatrix* * X has types: Stype = DN, Dtype = _D, Mtype = GE. * If info = 0 or info = A->ncol+1, X contains the solution matrix * to the original system of equations. Note that A and B are modified * on exit if equed is not NOEQUIL, and the solution to the * equilibrated system is inv(diag(C))*X if trans = NOTRANS and * equed = COL or BOTH, or inv(diag(R))*X if trans = TRANS or CONJ * and equed = ROW or BOTH. * * recip_pivot_growth (output) double* * The reciprocal pivot growth factor computed as * max_j ( max_i(abs(A_ij)) / max_i(abs(U_ij)) ). * If recip_pivot_growth is much less than 1, the stability of the * LU factorization could be poor. * * rcond (output) double* * The estimate of the reciprocal condition number of the matrix A * after equilibration (if done). If rcond is less than the machine * precision (in particular, if rcond = 0), the matrix is singular * to working precision. This condition is indicated by a return * code of info > 0. * * ferr (output) double*, dimension (B->ncol) * The estimated forward error bound for each solution vector * X(j) (the j-th column of the solution matrix X). * If XTRUE is the true solution corresponding to X(j), FERR(j) * is an estimated upper bound for the magnitude of the largest * element in (X(j) - XTRUE) divided by the magnitude of the * largest element in X(j). The estimate is as reliable as * the estimate for RCOND, and is almost always a slight * overestimate of the true error. * * berr (output) double*, dimension (B->ncol) * The componentwise relative backward error of each solution * vector X(j) (i.e., the smallest relative change in * any element of A or B that makes X(j) an exact solution). * * superlu_memusage (output) superlu_memusage_t* * Record the memory usage statistics, consisting of following fields: * - for_lu (float) * The amount of space used in bytes for L\U data structures. * - total_needed (float) * The amount of space needed in bytes to perform factorization. * - expansions (int) * The number of memory expansions during the LU factorization. * * info (output) int* * = 0: successful exit * < 0: if info = -i, the i-th argument had an illegal value * > 0: if info = i, and i is * <= A->ncol: U(i,i) is exactly zero. The factorization has * been completed, but the factor U is exactly * singular, so the solution and error bounds * could not be computed. * = A->ncol+1: U is nonsingular, but RCOND is less than machine * precision, meaning that the matrix is singular to * working precision. Nevertheless, the solution and * error bounds are computed because there are a number * of situations where the computed solution can be more * accurate than the value of RCOND would suggest. * > A->ncol+1: number of bytes allocated when memory allocation * failure occurred, plus A->ncol. * */ NCformat *Astore; DNformat *Bstore, *Xstore; double *Bmat, *Xmat; int ldb, ldx, nrhs; SuperMatrix *AA; /* A in NC format used by the factorization routine.*/ SuperMatrix AC; /* Matrix postmultiplied by Pc */ int colequ, equil, dofact, notran, rowequ; char norm[1]; trans_t trant; int i, j, info1; double amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin; int n, relax, panel_size; Gstat_t Gstat; double t0; /* temporary time */ double *utime; flops_t *ops, flopcnt; /* External functions */ extern double dlangs(char *, SuperMatrix *); extern double dlamch_(char *); Astore = A->Store; Bstore = B->Store; Xstore = X->Store; Bmat = Bstore->nzval; Xmat = Xstore->nzval; n = A->ncol; ldb = Bstore->lda; ldx = Xstore->lda; nrhs = B->ncol; superlumt_options->perm_c = perm_c; superlumt_options->perm_r = perm_r; *info = 0; dofact = (superlumt_options->fact == DOFACT); equil = (superlumt_options->fact == EQUILIBRATE); notran = (superlumt_options->trans == NOTRANS); if (dofact || equil) { *equed = NOEQUIL; rowequ = FALSE; colequ = FALSE; } else { rowequ = (*equed == ROW) || (*equed == BOTH); colequ = (*equed == COL) || (*equed == BOTH); smlnum = dlamch_("Safe minimum"); bignum = 1. / smlnum; } /* ------------------------------------------------------------ Test the input parameters. ------------------------------------------------------------*/ if ( nprocs <= 0 ) *info = -1; else if ( (!dofact && !equil && (superlumt_options->fact != FACTORED)) || (!notran && (superlumt_options->trans != TRANS) && (superlumt_options->trans != CONJ)) || (superlumt_options->refact != YES && superlumt_options->refact != NO) || (superlumt_options->usepr != YES && superlumt_options->usepr != NO) || superlumt_options->lwork < -1 ) *info = -2; else if ( A->nrow != A->ncol || A->nrow < 0 || (A->Stype != SLU_NC && A->Stype != SLU_NR) || A->Dtype != SLU_D || A->Mtype != SLU_GE ) *info = -3; else if ((superlumt_options->fact == FACTORED) && !(rowequ || colequ || (*equed == NOEQUIL))) *info = -6; else { if (rowequ) { rcmin = bignum; rcmax = 0.; for (j = 0; j < A->nrow; ++j) { rcmin = SUPERLU_MIN(rcmin, R[j]); rcmax = SUPERLU_MAX(rcmax, R[j]); } if (rcmin <= 0.) *info = -7; else if ( A->nrow > 0) rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); else rowcnd = 1.; } if (colequ && *info == 0) { rcmin = bignum; rcmax = 0.; for (j = 0; j < A->nrow; ++j) { rcmin = SUPERLU_MIN(rcmin, C[j]); rcmax = SUPERLU_MAX(rcmax, C[j]); } if (rcmin <= 0.) *info = -8; else if (A->nrow > 0) colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); else colcnd = 1.; } if (*info == 0) { if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) || B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE ) *info = -11; else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) || B->ncol != X->ncol || X->Stype != SLU_DN || X->Dtype != SLU_D || X->Mtype != SLU_GE ) *info = -12; } } if (*info != 0) { i = -(*info); xerbla_("pdgssvx", &i); return; } /* ------------------------------------------------------------ Allocate storage and initialize statistics variables. ------------------------------------------------------------*/ panel_size = superlumt_options->panel_size; relax = superlumt_options->relax; StatAlloc(n, nprocs, panel_size, relax, &Gstat); StatInit(n, nprocs, &Gstat); utime = Gstat.utime; ops = Gstat.ops; /* ------------------------------------------------------------ Convert A to NC format when necessary. ------------------------------------------------------------*/ if ( A->Stype == SLU_NR ) { NRformat *Astore = A->Store; AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, Astore->nzval, Astore->colind, Astore->rowptr, SLU_NC, A->Dtype, A->Mtype); if ( notran ) { /* Reverse the transpose argument. */ trant = TRANS; notran = 0; } else { trant = NOTRANS; notran = 1; } } else { /* A->Stype == NC */ trant = superlumt_options->trans; AA = A; } /* ------------------------------------------------------------ Diagonal scaling to equilibrate the matrix. ------------------------------------------------------------*/ if ( equil ) { t0 = SuperLU_timer_(); /* Compute row and column scalings to equilibrate the matrix A. */ dgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1); if ( info1 == 0 ) { /* Equilibrate matrix A. */ dlaqgs(AA, R, C, rowcnd, colcnd, amax, equed); rowequ = (*equed == ROW) || (*equed == BOTH); colequ = (*equed == COL) || (*equed == BOTH); } utime[EQUIL] = SuperLU_timer_() - t0; } /* ------------------------------------------------------------ Scale the right hand side. ------------------------------------------------------------*/ if ( notran ) { if ( rowequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Bmat[i + j*ldb] *= R[i]; } } } else if ( colequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Bmat[i + j*ldb] *= C[i]; } } /* ------------------------------------------------------------ Perform the LU factorization. ------------------------------------------------------------*/ if ( dofact || equil ) { /* Obtain column etree, the column count (colcnt_h) and supernode partition (part_super_h) for the Householder matrix. */ t0 = SuperLU_timer_(); sp_colorder(AA, perm_c, superlumt_options, &AC); utime[ETREE] = SuperLU_timer_() - t0; #if ( PRNTlevel >= 2 ) printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", relax, panel_size, sp_ienv(3), sp_ienv(4)); fflush(stdout); #endif /* Compute the LU factorization of A*Pc. */ t0 = SuperLU_timer_(); pdgstrf(superlumt_options, &AC, perm_r, L, U, &Gstat, info); utime[FACT] = SuperLU_timer_() - t0; flopcnt = 0; for (i = 0; i < nprocs; ++i) flopcnt += Gstat.procstat[i].fcops; ops[FACT] = flopcnt; if ( superlumt_options->lwork == -1 ) { superlu_memusage->total_needed = *info - A->ncol; return; } } if ( *info > 0 ) { if ( *info <= A->ncol ) { /* Compute the reciprocal pivot growth factor of the leading rank-deficient *info columns of A. */ *recip_pivot_growth = dPivotGrowth(*info, AA, perm_c, L, U); } } else { /* ------------------------------------------------------------ Compute the reciprocal pivot growth factor *recip_pivot_growth. ------------------------------------------------------------*/ *recip_pivot_growth = dPivotGrowth(A->ncol, AA, perm_c, L, U); /* ------------------------------------------------------------ Estimate the reciprocal of the condition number of A. ------------------------------------------------------------*/ t0 = SuperLU_timer_(); if ( notran ) { *(unsigned char *)norm = '1'; } else { *(unsigned char *)norm = 'I'; } anorm = dlangs(norm, AA); dgscon(norm, L, U, anorm, rcond, info); utime[RCOND] = SuperLU_timer_() - t0; /* ------------------------------------------------------------ Compute the solution matrix X. ------------------------------------------------------------*/ for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */ for (i = 0; i < B->nrow; i++) Xmat[i + j*ldx] = Bmat[i + j*ldb]; t0 = SuperLU_timer_(); dgstrs(trant, L, U, perm_r, perm_c, X, &Gstat, info); utime[SOLVE] = SuperLU_timer_() - t0; ops[SOLVE] = ops[TRISOLVE]; /* ------------------------------------------------------------ Use iterative refinement to improve the computed solution and compute error bounds and backward error estimates for it. ------------------------------------------------------------*/ t0 = SuperLU_timer_(); dgsrfs(trant, AA, L, U, perm_r, perm_c, *equed, R, C, B, X, ferr, berr, &Gstat, info); utime[REFINE] = SuperLU_timer_() - t0; /* ------------------------------------------------------------ Transform the solution matrix X to a solution of the original system. ------------------------------------------------------------*/ if ( notran ) { if ( colequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Xmat[i + j*ldx] *= C[i]; } } } else if ( rowequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Xmat[i + j*ldx] *= R[i]; } } /* Set INFO = A->ncol+1 if the matrix is singular to working precision.*/ if ( *rcond < dlamch_("E") ) *info = A->ncol + 1; } superlu_dQuerySpace(nprocs, L, U, panel_size, superlu_memusage); /* ------------------------------------------------------------ Deallocate storage after factorization. ------------------------------------------------------------*/ if ( superlumt_options->refact == NO ) { SUPERLU_FREE(superlumt_options->etree); SUPERLU_FREE(superlumt_options->colcnt_h); SUPERLU_FREE(superlumt_options->part_super_h); } if ( dofact || equil ) { Destroy_CompCol_Permuted(&AC); } if ( A->Stype == SLU_NR ) { Destroy_SuperMatrix_Store(AA); SUPERLU_FREE(AA); } /* ------------------------------------------------------------ Print timings, then deallocate statistic variables. ------------------------------------------------------------*/ #ifdef PROFILE { SCPformat *Lstore = (SCPformat *) L->Store; ParallelProfile(n, Lstore->nsuper+1, Gstat.num_panels, nprocs, &Gstat); } #endif PrintStat(&Gstat); StatFree(&Gstat); }
main(int argc, char *argv[]) { /* * Purpose * ======= * * The driver program DLINSOLX2. * * This example illustrates how to use DGSSVX to solve systems repeatedly * with the same sparsity pattern of matrix A. * In this case, the column permutation vector perm_c is computed once. * The following data structures will be reused in the subsequent call to * DGSSVX: perm_c, etree * */ char equed[1]; yes_no_t equil; trans_t trans; SuperMatrix A, A1, L, U; SuperMatrix B, B1, X; NCformat *Astore; NCformat *Ustore; SCformat *Lstore; double *a, *a1; int *asub, *xa, *asub1, *xa1; int *perm_r; /* row permutations from partial pivoting */ int *perm_c; /* column permutation vector */ int *etree; void *work; int info, lwork, nrhs, ldx; int i, j, m, n, nnz; double *rhsb, *rhsb1, *rhsx, *xact; double *R, *C; double *ferr, *berr; double u, rpg, rcond; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; extern void parse_command_line(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Enter main()"); #endif /* Defaults */ lwork = 0; nrhs = 1; equil = YES; u = 1.0; trans = NOTRANS; /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options(&options); /* Can use command line input to modify the defaults. */ parse_command_line(argc, argv, &lwork, &u, &equil, &trans); options.Equil = equil; options.DiagPivotThresh = u; options.Trans = trans; if ( lwork > 0 ) { work = SUPERLU_MALLOC(lwork); if ( !work ) { ABORT("DLINSOLX: cannot allocate work[]"); } } /* Read matrix A from a file in Harwell-Boeing format.*/ dreadhb(&m, &n, &nnz, &a, &asub, &xa); if ( !(a1 = doubleMalloc(nnz)) ) ABORT("Malloc fails for a1[]."); if ( !(asub1 = intMalloc(nnz)) ) ABORT("Malloc fails for asub1[]."); if ( !(xa1 = intMalloc(n+1)) ) ABORT("Malloc fails for xa1[]."); for (i = 0; i < nnz; ++i) { a1[i] = a[i]; asub1[i] = asub[i]; } for (i = 0; i < n+1; ++i) xa1[i] = xa[i]; dCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE); Astore = A.Store; printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz); if ( !(rhsb = doubleMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[]."); if ( !(rhsb1 = doubleMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb1[]."); if ( !(rhsx = doubleMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[]."); dCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_D, SLU_GE); dCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_D, SLU_GE); xact = doubleMalloc(n * nrhs); ldx = n; dGenXtrue(n, nrhs, xact, ldx); dFillRHS(trans, nrhs, xact, ldx, &A, &B); for (j = 0; j < nrhs; ++j) for (i = 0; i < m; ++i) rhsb1[i+j*m] = rhsb[i+j*m]; if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[]."); if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[]."); if ( !(R = (double *) SUPERLU_MALLOC(A.nrow * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for R[]."); if ( !(C = (double *) SUPERLU_MALLOC(A.ncol * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for C[]."); if ( !(ferr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for ferr[]."); if ( !(berr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for berr[]."); /* Initialize the statistics variables. */ StatInit(&stat); /* ------------------------------------------------------------ WE SOLVE THE LINEAR SYSTEM FOR THE FIRST TIME: AX = B ------------------------------------------------------------*/ dgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info); printf("First system: dgssvx() returns info %d\n", info); if ( info == 0 || info == n+1 ) { /* This is how you could access the solution matrix. */ double *sol = (double*) ((DNformat*) X.Store)->nzval; if ( options.PivotGrowth ) printf("Recip. pivot growth = %e\n", rpg); if ( options.ConditionNumber ) printf("Recip. condition number = %e\n", rcond); Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/nnz); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); if ( options.IterRefine ) { printf("Iterative Refinement:\n"); printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"); for (i = 0; i < nrhs; ++i) printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]); } fflush(stdout); } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); Destroy_CompCol_Matrix(&A); Destroy_Dense_Matrix(&B); if ( lwork >= 0 ) { /* Deallocate storage associated with L and U. */ Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } /* ------------------------------------------------------------ NOW WE SOLVE ANOTHER LINEAR SYSTEM: A1*X = B1 ONLY THE SPARSITY PATTERN OF A1 IS THE SAME AS THAT OF A. ------------------------------------------------------------*/ options.Fact = SamePattern; StatInit(&stat); /* Initialize the statistics variables. */ dCreate_CompCol_Matrix(&A1, m, n, nnz, a1, asub1, xa1, SLU_NC, SLU_D, SLU_GE); dCreate_Dense_Matrix(&B1, m, nrhs, rhsb1, m, SLU_DN, SLU_D, SLU_GE); dgssvx(&options, &A1, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B1, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info); printf("\nSecond system: dgssvx() returns info %d\n", info); if ( info == 0 || info == n+1 ) { /* This is how you could access the solution matrix. */ double *sol = (double*) ((DNformat*) X.Store)->nzval; if ( options.PivotGrowth ) printf("Recip. pivot growth = %e\n", rpg); if ( options.ConditionNumber ) printf("Recip. condition number = %e\n", rcond); Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); if ( options.IterRefine ) { printf("Iterative Refinement:\n"); printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"); for (i = 0; i < nrhs; ++i) printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]); } fflush(stdout); } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); SUPERLU_FREE (xact); SUPERLU_FREE (etree); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); SUPERLU_FREE (R); SUPERLU_FREE (C); SUPERLU_FREE (ferr); SUPERLU_FREE (berr); Destroy_CompCol_Matrix(&A1); Destroy_Dense_Matrix(&B1); Destroy_Dense_Matrix(&X); if ( lwork >= 0 ) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Exit main()"); #endif }
void RKWidget::step ( const size_t nStep ) { DNformat *Bstore; DNformat *Xstore; double temp; if(unstable) return; samset = false; if( dirty ) { if(aexist) { Destroy_CompCol_Matrix(&A); if ( lwork == 0 ) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&Up); } else if ( lwork > 0 ) { SUPERLU_FREE(work); } // these may be freed in dgssvx or Destroy_CompCol_Matrix I think aexist = false; } a = new double[nvar*N_]; xa = new int[N_+1]; asub = new int[nvar*N_]; updateCoef(method); if(nblock == 1) { // load with coef[] ??? fillA(); } else if (nup+ndn == 0) { // solve seperate independent blocks??? } else { // load block system blockFillA(); } dCreate_CompCol_Matrix(&A, N_, N_, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE); aexist = true; /* Initialize the statistics variables. */ StatInit(&stat); dPrint_CompCol_Matrix("A matrix", &A); options.Fact = DOFACT; //options.ColPerm=NATURAL; options.ColPerm=COLAMD; options.PivotGrowth = NO; options.ConditionNumber = NO; /* ONLY PERFORM THE LU DECOMPOSITION */ B.ncol = 0; /* Indicate not to solve the system */ dgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &Up, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info); //dPrint_CompCol_Matrix("A matrix", &A); printf("LU factorization: dgssvx() returns info %d\n", info); if ( info == 0 || info == N_+1 ) { if ( options.PivotGrowth ) printf("Recip. pivot growth = %e\n", rpg); if ( options.ConditionNumber ) printf("Recip. condition number = %e\n", rcond); printf("L\\U_ MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); fflush(stdout); options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */ B.ncol = 1; dirty = false; } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); } for ( size_t n = 0; n < nStep; n++ ) { for( int ns = 0; ns < nStage; ns++ ) { ///set B matrix Bstore= (DNformat *)B.Store; rhsb=(double*)Bstore->nzval; if (nup+ndn == 0) { // solve seperate independent blocks??? std::cout << "Discontinuous Galerkin Not Implimented\n"; return; } else { fillB(ns); } ///solve factored system StatInit(&stat); options.Fact = FACTORED; dgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &Up, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info); //if( n == 1 ) printf("Triangular solve: dgssvx() returns info %d\n", info); ///update U_ if ( info == 0 || info == N_+1 ) { /* This is how you could access the solution matrix. */ Xstore = (DNformat *)X.Store; rhsx = (double*)(Xstore->nzval); for ( size_t i = 0; i < N_ ; i++ ) { if(rhsx[i]> 1e16) unstable = true; b_k[i+N_*ns]=rhsx[i]; } } else { std::cout << "ERROR: Matrix Solution Failed info = " << info << std::endl; } StatFree(&stat); } for(int j=0 ; j<nStage; j++) { temp=b_b[j]; if( temp != 0 ) { for(size_t i = 0; i < N_ ; i++ ) { U_[i] += temp*b_k[i+j*N_]; } } } cStep++; totCFL += CFL; } }
NLboolean nlFactorize_SUPERLU() { /* OpenNL Context */ NLSparseMatrix* M = &(nlCurrentContext->M) ; NLuint n = nlCurrentContext->n ; NLuint nnz = nlSparseMatrixNNZ(M) ; /* Number of Non-Zero coeffs */ superlu_context* context = (superlu_context*)(nlCurrentContext->direct_solver_context) ; if(context == NULL) { nlCurrentContext->direct_solver_context = malloc(sizeof(superlu_context)) ; context = (superlu_context*)(nlCurrentContext->direct_solver_context) ; } /* SUPERLU variables */ NLint info ; SuperMatrix A, AC ; /* Temporary variables */ NLRowColumn* Ci = NULL ; NLuint i,j,count ; /* Sanity checks */ nl_assert(!(M->storage & NL_MATRIX_STORE_SYMMETRIC)) ; nl_assert(M->storage & NL_MATRIX_STORE_ROWS) ; nl_assert(M->m == M->n) ; set_default_options(&(context->options)) ; switch(nlCurrentContext->solver) { case NL_SUPERLU_EXT: { context->options.ColPerm = NATURAL ; } break ; case NL_PERM_SUPERLU_EXT: { context->options.ColPerm = COLAMD ; } break ; case NL_SYMMETRIC_SUPERLU_EXT: { context->options.ColPerm = MMD_AT_PLUS_A ; context->options.SymmetricMode = YES ; } break ; default: { nl_assert_not_reached ; } break ; } StatInit(&(context->stat)) ; /* * Step 1: convert matrix M into SUPERLU compressed column representation * ---------------------------------------------------------------------- */ NLint* xa = NL_NEW_ARRAY(NLint, n+1) ; NLdouble* a = NL_NEW_ARRAY(NLdouble, nnz) ; NLint* asub = NL_NEW_ARRAY(NLint, nnz) ; count = 0 ; for(i = 0; i < n; i++) { Ci = &(M->row[i]) ; xa[i] = count ; for(j = 0; j < Ci->size; j++) { a[count] = Ci->coeff[j].value ; asub[count] = Ci->coeff[j].index ; count++ ; } } xa[n] = nnz ; dCreate_CompCol_Matrix( &A, n, n, nnz, a, asub, xa, SLU_NR, /* Row wise */ SLU_D, /* doubles */ SLU_GE /* general storage */ ); /* * Step 2: factorize matrix * ------------------------ */ context->perm_c = NL_NEW_ARRAY(NLint, n) ; context->perm_r = NL_NEW_ARRAY(NLint, n) ; NLint* etree = NL_NEW_ARRAY(NLint, n) ; get_perm_c(context->options.ColPerm, &A, context->perm_c) ; sp_preorder(&(context->options), &A, context->perm_c, etree, &AC) ; int panel_size = sp_ienv(1) ; int relax = sp_ienv(2) ; dgstrf(&(context->options), &AC, relax, panel_size, etree, NULL, 0, context->perm_c, context->perm_r, &(context->L), &(context->U), &(context->stat), &info) ; /* * Step 3: cleanup * --------------- */ NL_DELETE_ARRAY(xa) ; NL_DELETE_ARRAY(a) ; NL_DELETE_ARRAY(asub) ; NL_DELETE_ARRAY(etree) ; Destroy_SuperMatrix_Store(&A); Destroy_CompCol_Permuted(&AC); StatFree(&(context->stat)); return NL_TRUE ; }
void dgsitrf(superlu_options_t *options, SuperMatrix *A, int relax, int panel_size, int *etree, void *work, int lwork, int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu, /* persistent to facilitate multiple factorizations */ SuperLUStat_t *stat, int *info) { /* Local working arrays */ NCPformat *Astore; int *iperm_r = NULL; /* inverse of perm_r; used when options->Fact == SamePattern_SameRowPerm */ int *iperm_c; /* inverse of perm_c */ int *swap, *iswap; /* swap is used to store the row permutation during the factorization. Initially, it is set to iperm_c (row indeces of Pc*A*Pc'). iswap is the inverse of swap. After the factorization, it is equal to perm_r. */ int *iwork; double *dwork; int *segrep, *repfnz, *parent, *xplore; int *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide SPA */ int *marker, *marker_relax; double *dense, *tempv; int *relax_end, *relax_fsupc; double *a; int *asub; int *xa_begin, *xa_end; int *xsup, *supno; int *xlsub, *xlusup, *xusub; int nzlumax; double *amax; double drop_sum; double alpha, omega; /* used in MILU, mimicing DRIC */ double *dwork2; /* used by the second dropping rule */ /* Local scalars */ fact_t fact = options->Fact; double diag_pivot_thresh = options->DiagPivotThresh; double drop_tol = options->ILU_DropTol; /* tau */ double fill_ini = options->ILU_FillTol; /* tau^hat */ double gamma = options->ILU_FillFactor; int drop_rule = options->ILU_DropRule; milu_t milu = options->ILU_MILU; double fill_tol; int pivrow; /* pivotal row number in the original matrix A */ int nseg1; /* no of segments in U-column above panel row jcol */ int nseg; /* no of segments in each U-column */ register int jcol; register int kcol; /* end column of a relaxed snode */ register int icol; register int i, k, jj, new_next, iinfo; int m, n, min_mn, jsupno, fsupc, nextlu, nextu; int w_def; /* upper bound on panel width */ int usepr, iperm_r_allocated = 0; int nnzL, nnzU; int *panel_histo = stat->panel_histo; flops_t *ops = stat->ops; int last_drop;/* the last column which the dropping rules applied */ int quota; int nnzAj; /* number of nonzeros in A(:,1:j) */ int nnzLj, nnzUj; double tol_L = drop_tol, tol_U = drop_tol; double zero = 0.0; double one = 1.0; /* Executable */ iinfo = 0; m = A->nrow; n = A->ncol; min_mn = SUPERLU_MIN(m, n); Astore = A->Store; a = Astore->nzval; asub = Astore->rowind; xa_begin = Astore->colbeg; xa_end = Astore->colend; /* Allocate storage common to the factor routines */ *info = dLUMemInit(fact, work, lwork, m, n, Astore->nnz, panel_size, gamma, L, U, Glu, &iwork, &dwork); if ( *info ) return; xsup = Glu->xsup; supno = Glu->supno; xlsub = Glu->xlsub; xlusup = Glu->xlusup; xusub = Glu->xusub; SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore, &repfnz, &panel_lsub, &marker_relax, &marker); dSetRWork(m, panel_size, dwork, &dense, &tempv); usepr = (fact == SamePattern_SameRowPerm); if ( usepr ) { /* Compute the inverse of perm_r */ iperm_r = (int *) intMalloc(m); for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k; iperm_r_allocated = 1; } iperm_c = (int *) intMalloc(n); for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k; swap = (int *)intMalloc(n); for (k = 0; k < n; k++) swap[k] = iperm_c[k]; iswap = (int *)intMalloc(n); for (k = 0; k < n; k++) iswap[k] = perm_c[k]; amax = (double *) doubleMalloc(panel_size); if (drop_rule & DROP_SECONDARY) dwork2 = (double *)doubleMalloc(n); else dwork2 = NULL; nnzAj = 0; nnzLj = 0; nnzUj = 0; last_drop = SUPERLU_MAX(min_mn - 2 * sp_ienv(7), (int)(min_mn * 0.95)); alpha = pow((double)n, -1.0 / options->ILU_MILU_Dim); /* Identify relaxed snodes */ relax_end = (int *) intMalloc(n); relax_fsupc = (int *) intMalloc(n); if ( options->SymmetricMode == YES ) ilu_heap_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc); else ilu_relax_snode(n, etree, relax, marker, relax_end, relax_fsupc); ifill (perm_r, m, EMPTY); ifill (marker, m * NO_MARKER, EMPTY); supno[0] = -1; xsup[0] = xlsub[0] = xusub[0] = xlusup[0] = 0; w_def = panel_size; /* Mark the rows used by relaxed supernodes */ ifill (marker_relax, m, EMPTY); i = mark_relax(m, relax_end, relax_fsupc, xa_begin, xa_end, asub, marker_relax); #if ( PRNTlevel >= 1) printf("%d relaxed supernodes.\n", i); #endif /* * Work on one "panel" at a time. A panel is one of the following: * (a) a relaxed supernode at the bottom of the etree, or * (b) panel_size contiguous columns, defined by the user */ for (jcol = 0; jcol < min_mn; ) { if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */ kcol = relax_end[jcol]; /* end of the relaxed snode */ panel_histo[kcol-jcol+1]++; /* Drop small rows in the previous supernode. */ if (jcol > 0 && jcol < last_drop) { int first = xsup[supno[jcol - 1]]; int last = jcol - 1; int quota; /* Compute the quota */ if (drop_rule & DROP_PROWS) quota = gamma * Astore->nnz / m * (m - first) / m * (last - first + 1); else if (drop_rule & DROP_COLUMN) { int i; quota = 0; for (i = first; i <= last; i++) quota += xa_end[i] - xa_begin[i]; quota = gamma * quota * (m - first) / m; } else if (drop_rule & DROP_AREA) quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) - nnzLj; else quota = m * n; fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / min_mn); /* Drop small rows */ i = ilu_ddrop_row(options, first, last, tol_L, quota, &nnzLj, &fill_tol, Glu, tempv, dwork2, 0); /* Reset the parameters */ if (drop_rule & DROP_DYNAMIC) { if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) < nnzLj) tol_L = SUPERLU_MIN(1.0, tol_L * 2.0); else tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5); } if (fill_tol < 0) iinfo -= (int)fill_tol; #ifdef DEBUG num_drop_L += i * (last - first + 1); #endif } /* -------------------------------------- * Factorize the relaxed supernode(jcol:kcol) * -------------------------------------- */ /* Determine the union of the row structure of the snode */ if ( (*info = ilu_dsnode_dfs(jcol, kcol, asub, xa_begin, xa_end, marker, Glu)) != 0 ) return; nextu = xusub[jcol]; nextlu = xlusup[jcol]; jsupno = supno[jcol]; fsupc = xsup[jsupno]; new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1); nzlumax = Glu->nzlumax; while ( new_next > nzlumax ) { if ((*info = dLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu))) return; } for (icol = jcol; icol <= kcol; icol++) { xusub[icol+1] = nextu; amax[0] = 0.0; /* Scatter into SPA dense[*] */ for (k = xa_begin[icol]; k < xa_end[icol]; k++) { register double tmp = fabs(a[k]); if (tmp > amax[0]) amax[0] = tmp; dense[asub[k]] = a[k]; } nnzAj += xa_end[icol] - xa_begin[icol]; if (amax[0] == 0.0) { amax[0] = fill_ini; #if ( PRNTlevel >= 1) printf("Column %d is entirely zero!\n", icol); fflush(stdout); #endif } /* Numeric update within the snode */ dsnode_bmod(icol, jsupno, fsupc, dense, tempv, Glu, stat); if (usepr) pivrow = iperm_r[icol]; fill_tol = pow(fill_ini, 1.0 - (double)icol / (double)min_mn); if ( (*info = ilu_dpivotL(icol, diag_pivot_thresh, &usepr, perm_r, iperm_c[icol], swap, iswap, marker_relax, &pivrow, amax[0] * fill_tol, milu, zero, Glu, stat)) ) { iinfo++; marker[pivrow] = kcol; } } jcol = kcol + 1; } else { /* Work on one panel of panel_size columns */ /* Adjust panel_size so that a panel won't overlap with the next * relaxed snode. */ panel_size = w_def; for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) if ( relax_end[k] != EMPTY ) { panel_size = k - jcol; break; } if ( k == min_mn ) panel_size = min_mn - jcol; panel_histo[panel_size]++; /* symbolic factor on a panel of columns */ ilu_dpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1, dense, amax, panel_lsub, segrep, repfnz, marker, parent, xplore, Glu); /* numeric sup-panel updates in topological order */ dpanel_bmod(m, panel_size, jcol, nseg1, dense, tempv, segrep, repfnz, Glu, stat); /* Sparse LU within the panel, and below panel diagonal */ for (jj = jcol; jj < jcol + panel_size; jj++) { k = (jj - jcol) * m; /* column index for w-wide arrays */ nseg = nseg1; /* Begin after all the panel segments */ nnzAj += xa_end[jj] - xa_begin[jj]; if ((*info = ilu_dcolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k], segrep, &repfnz[k], marker, parent, xplore, Glu))) return; /* Numeric updates */ if ((*info = dcolumn_bmod(jj, (nseg - nseg1), &dense[k], tempv, &segrep[nseg1], &repfnz[k], jcol, Glu, stat)) != 0) return; /* Make a fill-in position if the column is entirely zero */ if (xlsub[jj + 1] == xlsub[jj]) { register int i, row; int nextl; int nzlmax = Glu->nzlmax; int *lsub = Glu->lsub; int *marker2 = marker + 2 * m; /* Allocate memory */ nextl = xlsub[jj] + 1; if (nextl >= nzlmax) { int error = dLUMemXpand(jj, nextl, LSUB, &nzlmax, Glu); if (error) { *info = error; return; } lsub = Glu->lsub; } xlsub[jj + 1]++; assert(xlusup[jj]==xlusup[jj+1]); xlusup[jj + 1]++; ((double *) Glu->lusup)[xlusup[jj]] = zero; /* Choose a row index (pivrow) for fill-in */ for (i = jj; i < n; i++) if (marker_relax[swap[i]] <= jj) break; row = swap[i]; marker2[row] = jj; lsub[xlsub[jj]] = row; #ifdef DEBUG printf("Fill col %d.\n", jj); fflush(stdout); #endif } /* Computer the quota */ if (drop_rule & DROP_PROWS) quota = gamma * Astore->nnz / m * jj / m; else if (drop_rule & DROP_COLUMN) quota = gamma * (xa_end[jj] - xa_begin[jj]) * (jj + 1) / m; else if (drop_rule & DROP_AREA) quota = gamma * 0.9 * nnzAj * 0.5 - nnzUj; else quota = m; /* Copy the U-segments to ucol[*] and drop small entries */ if ((*info = ilu_dcopy_to_ucol(jj, nseg, segrep, &repfnz[k], perm_r, &dense[k], drop_rule, milu, amax[jj - jcol] * tol_U, quota, &drop_sum, &nnzUj, Glu, dwork2)) != 0) return; /* Reset the dropping threshold if required */ if (drop_rule & DROP_DYNAMIC) { if (gamma * 0.9 * nnzAj * 0.5 < nnzLj) tol_U = SUPERLU_MIN(1.0, tol_U * 2.0); else tol_U = SUPERLU_MAX(drop_tol, tol_U * 0.5); } if (drop_sum != zero) { if (drop_sum > zero) omega = SUPERLU_MIN(2.0 * (1.0 - alpha) * amax[jj - jcol] / drop_sum, one); else omega = SUPERLU_MAX(2.0 * (1.0 - alpha) * amax[jj - jcol] / drop_sum, -one); drop_sum *= omega; } if (usepr) pivrow = iperm_r[jj]; fill_tol = pow(fill_ini, 1.0 - (double)jj / (double)min_mn); if ( (*info = ilu_dpivotL(jj, diag_pivot_thresh, &usepr, perm_r, iperm_c[jj], swap, iswap, marker_relax, &pivrow, amax[jj - jcol] * fill_tol, milu, drop_sum, Glu, stat)) ) { iinfo++; marker[m + pivrow] = jj; marker[2 * m + pivrow] = jj; } /* Reset repfnz[] for this column */ resetrep_col (nseg, segrep, &repfnz[k]); /* Start a new supernode, drop the previous one */ if (jj > 0 && supno[jj] > supno[jj - 1] && jj < last_drop) { int first = xsup[supno[jj - 1]]; int last = jj - 1; int quota; /* Compute the quota */ if (drop_rule & DROP_PROWS) quota = gamma * Astore->nnz / m * (m - first) / m * (last - first + 1); else if (drop_rule & DROP_COLUMN) { int i; quota = 0; for (i = first; i <= last; i++) quota += xa_end[i] - xa_begin[i]; quota = gamma * quota * (m - first) / m; } else if (drop_rule & DROP_AREA) quota = gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) - nnzLj; else quota = m * n; fill_tol = pow(fill_ini, 1.0 - 0.5 * (first + last) / (double)min_mn); /* Drop small rows */ i = ilu_ddrop_row(options, first, last, tol_L, quota, &nnzLj, &fill_tol, Glu, tempv, dwork2, 1); /* Reset the parameters */ if (drop_rule & DROP_DYNAMIC) { if (gamma * nnzAj * (1.0 - 0.5 * (last + 1.0) / m) < nnzLj) tol_L = SUPERLU_MIN(1.0, tol_L * 2.0); else tol_L = SUPERLU_MAX(drop_tol, tol_L * 0.5); } if (fill_tol < 0) iinfo -= (int)fill_tol; #ifdef DEBUG num_drop_L += i * (last - first + 1); #endif } /* if start a new supernode */ } /* for */ jcol += panel_size; /* Move to the next panel */ } /* else */ } /* for */ *info = iinfo; if ( m > n ) { k = 0; for (i = 0; i < m; ++i) if ( perm_r[i] == EMPTY ) { perm_r[i] = n + k; ++k; } } ilu_countnz(min_mn, &nnzL, &nnzU, Glu); fixupL(min_mn, perm_r, Glu); dLUWorkFree(iwork, dwork, Glu); /* Free work space and compress storage */ if ( fact == SamePattern_SameRowPerm ) { /* L and U structures may have changed due to possibly different pivoting, even though the storage is available. There could also be memory expansions, so the array locations may have changed, */ ((SCformat *)L->Store)->nnz = nnzL; ((SCformat *)L->Store)->nsuper = Glu->supno[n]; ((SCformat *)L->Store)->nzval = (double *) Glu->lusup; ((SCformat *)L->Store)->nzval_colptr = Glu->xlusup; ((SCformat *)L->Store)->rowind = Glu->lsub; ((SCformat *)L->Store)->rowind_colptr = Glu->xlsub; ((NCformat *)U->Store)->nnz = nnzU; ((NCformat *)U->Store)->nzval = (double *) Glu->ucol; ((NCformat *)U->Store)->rowind = Glu->usub; ((NCformat *)U->Store)->colptr = Glu->xusub; } else { dCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, (double *) Glu->lusup, Glu->xlusup, Glu->lsub, Glu->xlsub, Glu->supno, Glu->xsup, SLU_SC, SLU_D, SLU_TRLU); dCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, (double *) Glu->ucol, Glu->usub, Glu->xusub, SLU_NC, SLU_D, SLU_TRU); } ops[FACT] += ops[TRSV] + ops[GEMV]; stat->expansions = --(Glu->num_expansions); if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r); SUPERLU_FREE (iperm_c); SUPERLU_FREE (relax_end); SUPERLU_FREE (swap); SUPERLU_FREE (iswap); SUPERLU_FREE (relax_fsupc); SUPERLU_FREE (amax); if ( dwork2 ) SUPERLU_FREE (dwork2); }
void dgsisx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, int *etree, char *equed, double *R, double *C, SuperMatrix *L, SuperMatrix *U, void *work, int lwork, SuperMatrix *B, SuperMatrix *X, double *recip_pivot_growth, double *rcond, GlobalLU_t *Glu, mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info) { DNformat *Bstore, *Xstore; double *Bmat, *Xmat; int ldb, ldx, nrhs, n; SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/ SuperMatrix AC; /* Matrix postmultiplied by Pc */ int colequ, equil, nofact, notran, rowequ, permc_spec, mc64; trans_t trant; char norm[1]; int i, j, info1; double amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin; int relax, panel_size; double diag_pivot_thresh; double t0; /* temporary time */ double *utime; int *perm = NULL; /* permutation returned from MC64 */ /* External functions */ extern double dlangs(char *, SuperMatrix *); Bstore = B->Store; Xstore = X->Store; Bmat = Bstore->nzval; Xmat = Xstore->nzval; ldb = Bstore->lda; ldx = Xstore->lda; nrhs = B->ncol; n = B->nrow; *info = 0; nofact = (options->Fact != FACTORED); equil = (options->Equil == YES); notran = (options->Trans == NOTRANS); mc64 = (options->RowPerm == LargeDiag); if ( nofact ) { *(unsigned char *)equed = 'N'; rowequ = FALSE; colequ = FALSE; } else { rowequ = strncmp(equed, "R", 1)==0 || strncmp(equed, "B", 1)==0; colequ = strncmp(equed, "C", 1)==0 || strncmp(equed, "B", 1)==0; smlnum = dmach("Safe minimum"); /* lamch_("Safe minimum"); */ bignum = 1. / smlnum; } /* Test the input parameters */ if (options->Fact != DOFACT && options->Fact != SamePattern && options->Fact != SamePattern_SameRowPerm && options->Fact != FACTORED && options->Trans != NOTRANS && options->Trans != TRANS && options->Trans != CONJ && options->Equil != NO && options->Equil != YES) *info = -1; else if ( A->nrow != A->ncol || A->nrow < 0 || (A->Stype != SLU_NC && A->Stype != SLU_NR) || A->Dtype != SLU_D || A->Mtype != SLU_GE ) *info = -2; else if ( options->Fact == FACTORED && !(rowequ || colequ || strncmp(equed, "N", 1)==0) ) *info = -6; else { if (rowequ) { rcmin = bignum; rcmax = 0.; for (j = 0; j < A->nrow; ++j) { rcmin = SUPERLU_MIN(rcmin, R[j]); rcmax = SUPERLU_MAX(rcmax, R[j]); } if (rcmin <= 0.) *info = -7; else if ( A->nrow > 0) rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); else rowcnd = 1.; } if (colequ && *info == 0) { rcmin = bignum; rcmax = 0.; for (j = 0; j < A->nrow; ++j) { rcmin = SUPERLU_MIN(rcmin, C[j]); rcmax = SUPERLU_MAX(rcmax, C[j]); } if (rcmin <= 0.) *info = -8; else if (A->nrow > 0) colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum); else colcnd = 1.; } if (*info == 0) { if ( lwork < -1 ) *info = -12; else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) || B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE ) *info = -13; else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) || (B->ncol != 0 && B->ncol != X->ncol) || X->Stype != SLU_DN || X->Dtype != SLU_D || X->Mtype != SLU_GE ) *info = -14; } } if (*info != 0) { i = -(*info); input_error("dgsisx", &i); return; } /* Initialization for factor parameters */ panel_size = sp_ienv(1); relax = sp_ienv(2); diag_pivot_thresh = options->DiagPivotThresh; utime = stat->utime; /* Convert A to SLU_NC format when necessary. */ if ( A->Stype == SLU_NR ) { NRformat *Astore = A->Store; AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, Astore->nzval, Astore->colind, Astore->rowptr, SLU_NC, A->Dtype, A->Mtype); if ( notran ) { /* Reverse the transpose argument. */ trant = TRANS; notran = 0; } else { trant = NOTRANS; notran = 1; } } else { /* A->Stype == SLU_NC */ trant = options->Trans; AA = A; } if ( nofact ) { register int i, j; NCformat *Astore = AA->Store; int nnz = Astore->nnz; int *colptr = Astore->colptr; int *rowind = Astore->rowind; double *nzval = (double *)Astore->nzval; if ( mc64 ) { t0 = SuperLU_timer_(); if ((perm = intMalloc(n)) == NULL) ABORT("SUPERLU_MALLOC fails for perm[]"); info1 = dldperm(5, n, nnz, colptr, rowind, nzval, perm, R, C); if (info1 != 0) { /* MC64 fails, call dgsequ() later */ mc64 = 0; SUPERLU_FREE(perm); perm = NULL; } else { if ( equil ) { rowequ = colequ = 1; for (i = 0; i < n; i++) { R[i] = exp(R[i]); C[i] = exp(C[i]); } /* scale the matrix */ for (j = 0; j < n; j++) { for (i = colptr[j]; i < colptr[j + 1]; i++) { nzval[i] *= R[rowind[i]] * C[j]; } } *equed = 'B'; } /* permute the matrix */ for (j = 0; j < n; j++) { for (i = colptr[j]; i < colptr[j + 1]; i++) { /*nzval[i] *= R[rowind[i]] * C[j];*/ rowind[i] = perm[rowind[i]]; } } } utime[EQUIL] = SuperLU_timer_() - t0; } if ( !mc64 & equil ) { /* Only perform equilibration, no row perm */ t0 = SuperLU_timer_(); /* Compute row and column scalings to equilibrate the matrix A. */ dgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1); if ( info1 == 0 ) { /* Equilibrate matrix A. */ dlaqgs(AA, R, C, rowcnd, colcnd, amax, equed); rowequ = strncmp(equed, "R", 1)==0 || strncmp(equed, "B", 1)==0; colequ = strncmp(equed, "C", 1)==0 || strncmp(equed, "B", 1)==0; } utime[EQUIL] = SuperLU_timer_() - t0; } } if ( nofact ) { t0 = SuperLU_timer_(); /* * Gnet column permutation vector perm_c[], according to permc_spec: * permc_spec = NATURAL: natural ordering * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A * permc_spec = MMD_ATA: minimum degree on structure of A'*A * permc_spec = COLAMD: approximate minimum degree column ordering * permc_spec = MY_PERMC: the ordering already supplied in perm_c[] */ permc_spec = options->ColPerm; if ( permc_spec != MY_PERMC && options->Fact == DOFACT ) get_perm_c(permc_spec, AA, perm_c); utime[COLPERM] = SuperLU_timer_() - t0; t0 = SuperLU_timer_(); sp_preorder(options, AA, perm_c, etree, &AC); utime[ETREE] = SuperLU_timer_() - t0; /* Compute the LU factorization of A*Pc. */ t0 = SuperLU_timer_(); dgsitrf(options, &AC, relax, panel_size, etree, work, lwork, perm_c, perm_r, L, U, Glu, stat, info); utime[FACT] = SuperLU_timer_() - t0; if ( lwork == -1 ) { mem_usage->total_needed = *info - A->ncol; return; } if ( mc64 ) { /* Fold MC64's perm[] into perm_r[]. */ NCformat *Astore = AA->Store; int nnz = Astore->nnz, *rowind = Astore->rowind; int *perm_tmp, *iperm; if ((perm_tmp = intMalloc(2*n)) == NULL) ABORT("SUPERLU_MALLOC fails for perm_tmp[]"); iperm = perm_tmp + n; for (i = 0; i < n; ++i) perm_tmp[i] = perm_r[perm[i]]; for (i = 0; i < n; ++i) { perm_r[i] = perm_tmp[i]; iperm[perm[i]] = i; } /* Restore A's original row indices. */ for (i = 0; i < nnz; ++i) rowind[i] = iperm[rowind[i]]; SUPERLU_FREE(perm); /* MC64 permutation */ SUPERLU_FREE(perm_tmp); } } if ( options->PivotGrowth ) { if ( *info > 0 ) return; /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */ *recip_pivot_growth = dPivotGrowth(A->ncol, AA, perm_c, L, U); } if ( options->ConditionNumber ) { /* Estimate the reciprocal of the condition number of A. */ t0 = SuperLU_timer_(); if ( notran ) { *(unsigned char *)norm = '1'; } else { *(unsigned char *)norm = 'I'; } anorm = dlangs(norm, AA); dgscon(norm, L, U, anorm, rcond, stat, &info1); utime[RCOND] = SuperLU_timer_() - t0; } if ( nrhs > 0 ) { /* Solve the system */ double *rhs_work; /* Scale and permute the right-hand side if equilibration and permutation from MC64 were performed. */ if ( notran ) { if ( rowequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < n; ++i) Bmat[i + j*ldb] *= R[i]; } } else if ( colequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < n; ++i) { Bmat[i + j*ldb] *= C[i]; } } /* Compute the solution matrix X. */ for (j = 0; j < nrhs; j++) /* Save a copy of the right hand sides */ for (i = 0; i < B->nrow; i++) Xmat[i + j*ldx] = Bmat[i + j*ldb]; t0 = SuperLU_timer_(); dgstrs (trant, L, U, perm_c, perm_r, X, stat, &info1); utime[SOLVE] = SuperLU_timer_() - t0; /* Transform the solution matrix X to a solution of the original system. */ if ( notran ) { if ( colequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < n; ++i) { Xmat[i + j*ldx] *= C[i]; } } } else { /* transposed system */ if ( rowequ ) { for (j = 0; j < nrhs; ++j) for (i = 0; i < A->nrow; ++i) { Xmat[i + j*ldx] *= R[i]; } } } } /* end if nrhs > 0 */ if ( options->ConditionNumber ) { /* The matrix is singular to working precision. */ /* if ( *rcond < dlamch_("E") && *info == 0) *info = A->ncol + 1; */ if ( *rcond < dmach("E") && *info == 0) *info = A->ncol + 1; } if ( nofact ) { ilu_dQuerySpace(L, U, mem_usage); Destroy_CompCol_Permuted(&AC); } if ( A->Stype == SLU_NR ) { Destroy_SuperMatrix_Store(AA); SUPERLU_FREE(AA); } }
void c_fortran_dgssv_(int *iopt, int *n, int *nnz, int *nrhs, double *values, int *rowind, int *colptr, double *b, int *ldb, fptr *f_factors, /* a handle containing the address pointing to the factored matrices */ int *info) { /* * This routine can be called from Fortran. * * iopt (input) int * Specifies the operation: * = 1, performs LU decomposition for the first time * = 2, performs triangular solve * = 3, free all the storage in the end * * f_factors (input/output) fptr* * If iopt == 1, it is an output and contains the pointer pointing to * the structure of the factored matrices. * Otherwise, it it an input. * */ SuperMatrix A, AC, B; SuperMatrix *L, *U; int *perm_r; /* row permutations from partial pivoting */ int *perm_c; /* column permutation vector */ int *etree; /* column elimination tree */ SCformat *Lstore; NCformat *Ustore; int i, panel_size, permc_spec, relax; trans_t trans; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; factors_t *LUfactors; trans = NOTRANS; if ( *iopt == 1 ) { /* LU decomposition */ /* Set the default input options. */ set_default_options(&options); /* Initialize the statistics variables. */ StatInit(&stat); /* Adjust to 0-based indexing */ for (i = 0; i < *nnz; ++i) --rowind[i]; for (i = 0; i <= *n; ++i) --colptr[i]; dCreate_CompCol_Matrix(&A, *n, *n, *nnz, values, rowind, colptr, SLU_NC, SLU_D, SLU_GE); L = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); U = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); if ( !(perm_r = intMalloc(*n)) ) ABORT("Malloc fails for perm_r[]."); if ( !(perm_c = intMalloc(*n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(etree = intMalloc(*n)) ) ABORT("Malloc fails for etree[]."); /* * Get column permutation vector perm_c[], according to permc_spec: * permc_spec = 0: natural ordering * permc_spec = 1: minimum degree on structure of A'*A * permc_spec = 2: minimum degree on structure of A'+A * permc_spec = 3: approximate minimum degree for unsymmetric matrices */ permc_spec = options.ColPerm; get_perm_c(permc_spec, &A, perm_c); sp_preorder(&options, &A, perm_c, etree, &AC); panel_size = sp_ienv(1); relax = sp_ienv(2); dgstrf(&options, &AC, relax, panel_size, etree, NULL, 0, perm_c, perm_r, L, U, &stat, info); if ( *info == 0 ) { Lstore = (SCformat *) L->Store; Ustore = (NCformat *) U->Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz); dQuerySpace(L, U, &mem_usage); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); } else { printf("dgstrf() error returns INFO= %d\n", *info); if ( *info <= *n ) { /* factorization completes */ dQuerySpace(L, U, &mem_usage); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); } } /* Restore to 1-based indexing */ for (i = 0; i < *nnz; ++i) ++rowind[i]; for (i = 0; i <= *n; ++i) ++colptr[i]; /* Save the LU factors in the factors handle */ LUfactors = (factors_t*) SUPERLU_MALLOC(sizeof(factors_t)); LUfactors->L = L; LUfactors->U = U; LUfactors->perm_c = perm_c; LUfactors->perm_r = perm_r; *f_factors = (fptr) LUfactors; /* Free un-wanted storage */ SUPERLU_FREE(etree); Destroy_SuperMatrix_Store(&A); Destroy_CompCol_Permuted(&AC); StatFree(&stat); } else if ( *iopt == 2 ) { /* Triangular solve */ /* Initialize the statistics variables. */ StatInit(&stat); /* Extract the LU factors in the factors handle */ LUfactors = (factors_t*) *f_factors; L = LUfactors->L; U = LUfactors->U; perm_c = LUfactors->perm_c; perm_r = LUfactors->perm_r; dCreate_Dense_Matrix(&B, *n, *nrhs, b, *ldb, SLU_DN, SLU_D, SLU_GE); /* Solve the system A*X=B, overwriting B with X. */ dgstrs (trans, L, U, perm_c, perm_r, &B, &stat, info); Destroy_SuperMatrix_Store(&B); StatFree(&stat); } else if ( *iopt == 3 ) { /* Free storage */ /* Free the LU factors in the factors handle */ LUfactors = (factors_t*) *f_factors; SUPERLU_FREE (LUfactors->perm_r); SUPERLU_FREE (LUfactors->perm_c); Destroy_SuperNode_Matrix(LUfactors->L); Destroy_CompCol_Matrix(LUfactors->U); SUPERLU_FREE (LUfactors->L); SUPERLU_FREE (LUfactors->U); SUPERLU_FREE (LUfactors); } else { fprintf(stderr,"Invalid iopt=%d passed to c_fortran_dgssv()\n",*iopt); exit(-1); } }
void dgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U, SuperMatrix *B, SuperLUStat_t *stat, int *info ) { DNformat *Bstore; SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/ SuperMatrix AC; /* Matrix postmultiplied by Pc */ int lwork = 0, *etree, i; /* Set default values for some parameters */ double drop_tol = 0.; int panel_size; /* panel size */ int relax; /* no of columns in a relaxed snodes */ int permc_spec; trans_t trans = NOTRANS; double *utime; double t; /* Temporary time */ /* Test the input parameters ... */ *info = 0; Bstore = B->Store; if ( options->Fact != DOFACT ) *info = -1; else if ( A->nrow != A->ncol || A->nrow < 0 || (A->Stype != SLU_NC && A->Stype != SLU_NR) || A->Dtype != SLU_D || A->Mtype != SLU_GE ) *info = -2; else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) || B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE ) *info = -7; if ( *info != 0 ) { i = -(*info); xerbla_("dgssv", &i); return; } utime = stat->utime; /* Convert A to SLU_NC format when necessary. */ if ( A->Stype == SLU_NR ) { NRformat *Astore = A->Store; AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, Astore->nzval, Astore->colind, Astore->rowptr, SLU_NC, A->Dtype, A->Mtype); trans = TRANS; } else { if ( A->Stype == SLU_NC ) AA = A; } t = SuperLU_timer_(); /* * Get column permutation vector perm_c[], according to permc_spec: * permc_spec = NATURAL: natural ordering * permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A * permc_spec = MMD_ATA: minimum degree on structure of A'*A * permc_spec = COLAMD: approximate minimum degree column ordering * permc_spec = MY_PERMC: the ordering already supplied in perm_c[] */ permc_spec = options->ColPerm; if ( permc_spec != MY_PERMC && options->Fact == DOFACT ) get_perm_c(permc_spec, AA, perm_c); utime[COLPERM] = SuperLU_timer_() - t; etree = intMalloc(A->ncol); t = SuperLU_timer_(); sp_preorder(options, AA, perm_c, etree, &AC); utime[ETREE] = SuperLU_timer_() - t; panel_size = sp_ienv(1); relax = sp_ienv(2); /*printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", relax, panel_size, sp_ienv(3), sp_ienv(4));*/ t = SuperLU_timer_(); /* Compute the LU factorization of A. */ dgstrf(options, &AC, drop_tol, relax, panel_size, etree, NULL, lwork, perm_c, perm_r, L, U, stat, info); utime[FACT] = SuperLU_timer_() - t; t = SuperLU_timer_(); if ( *info == 0 ) { /* Solve the system A*X=B, overwriting B with X. */ dgstrs (trans, L, U, perm_c, perm_r, B, stat, info); } utime[SOLVE] = SuperLU_timer_() - t; SUPERLU_FREE (etree); Destroy_CompCol_Permuted(&AC); if ( A->Stype == SLU_NR ) { Destroy_SuperMatrix_Store(AA); SUPERLU_FREE(AA); } }
void pdgssv(int nprocs, SuperMatrix *A, int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U, SuperMatrix *B, int *info ) { /* * -- SuperLU MT routine (version 2.0) -- * Lawrence Berkeley National Lab, Univ. of California Berkeley, * and Xerox Palo Alto Research Center. * September 10, 2007 * * Purpose * ======= * * PDGSSV solves the system of linear equations A*X=B, using the parallel * LU factorization routine PDGSTRF. It performs the following steps: * * 1. If A is stored column-wise (A->Stype = NC): * * 1.1. Permute the columns of A, forming A*Pc, where Pc is a * permutation matrix. * For more details of this step, see sp_preorder.c. * * 1.2. Factor A as Pr*A*Pc=L*U with the permutation Pr determined * by Gaussian elimination with partial pivoting. * L is unit lower triangular with offdiagonal entries * bounded by 1 in magnitude, and U is upper triangular. * * 1.3. Solve the system of equations A*X=B using the factored * form of A. * * 2. If A is stored row-wise (A->Stype = NR), apply the above algorithm * to the tranpose of A: * * 2.1. Permute columns of tranpose(A) (rows of A), * forming transpose(A)*Pc, where Pc is a permutation matrix. * For more details of this step, see sp_preorder.c. * * 2.2. Factor A as Pr*transpose(A)*Pc=L*U with the permutation Pr * determined by Gaussian elimination with partial pivoting. * L is unit lower triangular with offdiagonal entries * bounded by 1 in magnitude, and U is upper triangular. * * 2.3. Solve the system of equations A*X=B using the factored * form of A. * * See supermatrix.h for the definition of "SuperMatrix" structure. * * * Arguments * ========= * * nprocs (input) int * Number of processes (or threads) to be spawned and used to perform * the LU factorization by pdgstrf(). There is a single thread of * control to call pdgstrf(), and all threads spawned by pdgstrf() * are terminated before returning from pdgstrf(). * * A (input) SuperMatrix* * Matrix A in A*X=B, of dimension (A->nrow, A->ncol), where * A->nrow = A->ncol. Currently, the type of A can be: * Stype = NC or NR; Dtype = _D; Mtype = GE. In the future, * more general A will be handled. * * perm_c (input/output) int* * If A->Stype=NC, column permutation vector of size A->ncol, * which defines the permutation matrix Pc; perm_c[i] = j means * column i of A is in position j in A*Pc. * On exit, perm_c may be overwritten by the product of the input * perm_c and a permutation that postorders the elimination tree * of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree * is already in postorder. * * If A->Stype=NR, column permutation vector of size A->nrow * which describes permutation of columns of tranpose(A) * (rows of A) as described above. * * perm_r (output) int*, * If A->Stype=NR, row permutation vector of size A->nrow, * which defines the permutation matrix Pr, and is determined * by partial pivoting. perm_r[i] = j means row i of A is in * position j in Pr*A. * * If A->Stype=NR, permutation vector of size A->ncol, which * determines permutation of rows of transpose(A) * (columns of A) as described above. * * L (output) SuperMatrix* * The factor L from the factorization * Pr*A*Pc=L*U (if A->Stype=NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype=NR). * Uses compressed row subscripts storage for supernodes, i.e., * L has types: Stype = SCP, Dtype = _D, Mtype = TRLU. * * U (output) SuperMatrix* * The factor U from the factorization * Pr*A*Pc=L*U (if A->Stype=NC) or * Pr*transpose(A)*Pc=L*U (if A->Stype=NR). * Use column-wise storage scheme, i.e., U has types: * Stype = NCP, Dtype = _D, Mtype = TRU. * * B (input/output) SuperMatrix* * B has types: Stype = DN, Dtype = _D, Mtype = GE. * On entry, the right hand side matrix. * On exit, the solution matrix if info = 0; * * info (output) int* * = 0: successful exit * > 0: if info = i, and i is * <= A->ncol: U(i,i) is exactly zero. The factorization has * been completed, but the factor U is exactly singular, * so the solution could not be computed. * > A->ncol: number of bytes allocated when memory allocation * failure occurred, plus A->ncol. * */ trans_t trans; NCformat *Astore; DNformat *Bstore; SuperMatrix *AA; /* A in NC format used by the factorization routine.*/ SuperMatrix AC; /* Matrix postmultiplied by Pc */ int i, n, panel_size, relax; fact_t fact; yes_no_t refact, usepr; double diag_pivot_thresh, drop_tol; void *work; int lwork; superlumt_options_t superlumt_options; Gstat_t Gstat; double t; /* Temporary time */ double *utime; flops_t *ops, flopcnt; /* ------------------------------------------------------------ Test the input parameters. ------------------------------------------------------------*/ Astore = A->Store; Bstore = B->Store; *info = 0; if ( nprocs <= 0 ) *info = -1; else if ( A->nrow != A->ncol || A->nrow < 0 || (A->Stype != SLU_NC && A->Stype != SLU_NR) || A->Dtype != SLU_D || A->Mtype != SLU_GE ) *info = -2; else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(1, A->nrow) )*info = -7; if ( *info != 0 ) { i = -(*info); xerbla_("pdgssv", &i); return; } #if 0 /* Use the best sequential code. if this part is commented out, we will use the parallel code run on one processor. */ if ( nprocs == 1 ) { return; } #endif fact = EQUILIBRATE; refact = NO; trans = NOTRANS; panel_size = sp_ienv(1); relax = sp_ienv(2); diag_pivot_thresh = 1.0; usepr = NO; drop_tol = 0.0; work = NULL; lwork = 0; /* ------------------------------------------------------------ Allocate storage and initialize statistics variables. ------------------------------------------------------------*/ n = A->ncol; StatAlloc(n, nprocs, panel_size, relax, &Gstat); StatInit(n, nprocs, &Gstat); utime = Gstat.utime; ops = Gstat.ops; /* ------------------------------------------------------------ Convert A to NC format when necessary. ------------------------------------------------------------*/ if ( A->Stype == SLU_NR ) { NRformat *Astore = A->Store; AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) ); dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, Astore->nzval, Astore->colind, Astore->rowptr, SLU_NC, A->Dtype, A->Mtype); trans = TRANS; } else if ( A->Stype == SLU_NC ) AA = A; /* ------------------------------------------------------------ Initialize the option structure superlumt_options using the user-input parameters; Apply perm_c to the columns of original A to form AC. ------------------------------------------------------------*/ pdgstrf_init(nprocs, fact, trans, refact, panel_size, relax, diag_pivot_thresh, usepr, drop_tol, perm_c, perm_r, work, lwork, AA, &AC, &superlumt_options, &Gstat); /* ------------------------------------------------------------ Compute the LU factorization of A. The following routine will create nprocs threads. ------------------------------------------------------------*/ pdgstrf(&superlumt_options, &AC, perm_r, L, U, &Gstat, info); flopcnt = 0; for (i = 0; i < nprocs; ++i) flopcnt += Gstat.procstat[i].fcops; ops[FACT] = flopcnt; #if ( PRNTlevel==1 ) printf("nprocs = %d, flops %e, Mflops %.2f\n", nprocs, flopcnt, flopcnt/utime[FACT]*1e-6); printf("Parameters: w %d, relax %d, maxsuper %d, rowblk %d, colblk %d\n", sp_ienv(1), sp_ienv(2), sp_ienv(3), sp_ienv(4), sp_ienv(5)); fflush(stdout); #endif /* ------------------------------------------------------------ Solve the system A*X=B, overwriting B with X. ------------------------------------------------------------*/ if ( *info == 0 ) { t = SuperLU_timer_(); dgstrs (trans, L, U, perm_r, perm_c, B, &Gstat, info); utime[SOLVE] = SuperLU_timer_() - t; ops[SOLVE] = ops[TRISOLVE]; } /* ------------------------------------------------------------ Deallocate storage after factorization. ------------------------------------------------------------*/ pxgstrf_finalize(&superlumt_options, &AC); if ( A->Stype == SLU_NR ) { Destroy_SuperMatrix_Store(AA); SUPERLU_FREE(AA); } /* ------------------------------------------------------------ Print timings, then deallocate statistic variables. ------------------------------------------------------------*/ #ifdef PROFILE { SCPformat *Lstore = (SCPformat *) L->Store; ParallelProfile(n, Lstore->nsuper+1, Gstat.num_panels, nprocs, &Gstat); } #endif //PrintStat(&Gstat); //FT Commented StatFree(&Gstat); }
int main(int argc, char *argv[]) { void dmatvec_mult(double alpha, double x[], double beta, double y[]); void dpsolve(int n, double x[], double y[]); extern int dfgmr( int n, void (*matvec_mult)(double, double [], double, double []), void (*psolve)(int n, double [], double[]), double *rhs, double *sol, double tol, int restrt, int *itmax, FILE *fits); extern int dfill_diag(int n, NCformat *Astore); char equed[1] = {'B'}; yes_no_t equil; trans_t trans; SuperMatrix A, L, U; SuperMatrix B, X; NCformat *Astore; NCformat *Ustore; SCformat *Lstore; double *a; int *asub, *xa; int *etree; int *perm_c; /* column permutation vector */ int *perm_r; /* row permutations from partial pivoting */ int nrhs, ldx, lwork, info, m, n, nnz; double *rhsb, *rhsx, *xact; double *work = NULL; double *R, *C; double u, rpg, rcond; double zero = 0.0; double one = 1.0; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; int restrt, iter, maxit, i; double resid; double *x, *b; #ifdef DEBUG extern int num_drop_L, num_drop_U; #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Enter main()"); #endif /* Defaults */ lwork = 0; nrhs = 1; equil = YES; u = 0.1; /* u=1.0 for complete factorization */ trans = NOTRANS; /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 0.1; //different from complete LU options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; options.RowPerm = LargeDiag; options.ILU_DropTol = 1e-4; options.ILU_FillTol = 1e-2; options.ILU_FillFactor = 10.0; options.ILU_DropRule = DROP_BASIC | DROP_AREA; options.ILU_Norm = INF_NORM; options.ILU_MILU = SILU; */ ilu_set_default_options(&options); /* Modify the defaults. */ options.PivotGrowth = YES; /* Compute reciprocal pivot growth */ options.ConditionNumber = YES;/* Compute reciprocal condition number */ if ( lwork > 0 ) { work = SUPERLU_MALLOC(lwork); if ( !work ) ABORT("Malloc fails for work[]."); } /* Read matrix A from a file in Harwell-Boeing format.*/ if (argc < 2) { printf("Usage:\n%s [OPTION] < [INPUT] > [OUTPUT]\nOPTION:\n" "-h -hb:\n\t[INPUT] is a Harwell-Boeing format matrix.\n" "-r -rb:\n\t[INPUT] is a Rutherford-Boeing format matrix.\n" "-t -triplet:\n\t[INPUT] is a triplet format matrix.\n", argv[0]); return 0; } else { switch (argv[1][1]) { case 'H': case 'h': printf("Input a Harwell-Boeing format matrix:\n"); dreadhb(&m, &n, &nnz, &a, &asub, &xa); break; case 'R': case 'r': printf("Input a Rutherford-Boeing format matrix:\n"); dreadrb(&m, &n, &nnz, &a, &asub, &xa); break; case 'T': case 't': printf("Input a triplet format matrix:\n"); dreadtriple(&m, &n, &nnz, &a, &asub, &xa); break; default: printf("Unrecognized format.\n"); return 0; } } dCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE); Astore = A.Store; dfill_diag(n, Astore); printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz); fflush(stdout); if ( !(rhsb = doubleMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[]."); if ( !(rhsx = doubleMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[]."); dCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_D, SLU_GE); dCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_D, SLU_GE); xact = doubleMalloc(n * nrhs); ldx = n; dGenXtrue(n, nrhs, xact, ldx); dFillRHS(trans, nrhs, xact, ldx, &A, &B); if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[]."); if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[]."); if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(R = (double *) SUPERLU_MALLOC(A.nrow * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for R[]."); if ( !(C = (double *) SUPERLU_MALLOC(A.ncol * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for C[]."); info = 0; #ifdef DEBUG num_drop_L = 0; num_drop_U = 0; #endif /* Initialize the statistics variables. */ StatInit(&stat); /* Compute the incomplete factorization and compute the condition number and pivot growth using dgsisx. */ dgsisx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, &mem_usage, &stat, &info); Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("dgsisx(): info %d\n", info); if (info > 0 || rcond < 1e-8 || rpg > 1e8) printf("WARNING: This preconditioner might be unstable.\n"); if ( info == 0 || info == n+1 ) { if ( options.PivotGrowth == YES ) printf("Recip. pivot growth = %e\n", rpg); if ( options.ConditionNumber == YES ) printf("Recip. condition number = %e\n", rcond); } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } printf("n(A) = %d, nnz(A) = %d\n", n, Astore->nnz); printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("Fill ratio: nnz(F)/nnz(A) = %.3f\n", ((double)(Lstore->nnz) + (double)(Ustore->nnz) - (double)n) / (double)Astore->nnz); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); fflush(stdout); /* Set the global variables. */ GLOBAL_A = &A; GLOBAL_L = &L; GLOBAL_U = &U; GLOBAL_STAT = &stat; GLOBAL_PERM_C = perm_c; GLOBAL_PERM_R = perm_r; /* Set the variables used by GMRES. */ restrt = SUPERLU_MIN(n / 3 + 1, 50); maxit = 1000; iter = maxit; resid = 1e-8; if (!(b = doubleMalloc(m))) ABORT("Malloc fails for b[]."); if (!(x = doubleMalloc(n))) ABORT("Malloc fails for x[]."); if (info <= n + 1) { int i_1 = 1; double maxferr = 0.0, nrmA, nrmB, res, t; double temp; extern double dnrm2_(int *, double [], int *); extern void daxpy_(int *, double *, double [], int *, double [], int *); /* Call GMRES. */ for (i = 0; i < n; i++) b[i] = rhsb[i]; for (i = 0; i < n; i++) x[i] = zero; t = SuperLU_timer_(); dfgmr(n, dmatvec_mult, dpsolve, b, x, resid, restrt, &iter, stdout); t = SuperLU_timer_() - t; /* Output the result. */ nrmA = dnrm2_(&(Astore->nnz), (double *)((DNformat *)A.Store)->nzval, &i_1); nrmB = dnrm2_(&m, b, &i_1); sp_dgemv("N", -1.0, &A, x, 1, 1.0, b, 1); res = dnrm2_(&m, b, &i_1); resid = res / nrmB; printf("||A||_F = %.1e, ||B||_2 = %.1e, ||B-A*X||_2 = %.1e, " "relres = %.1e\n", nrmA, nrmB, res, resid); if (iter >= maxit) { if (resid >= 1.0) iter = -180; else if (resid > 1e-8) iter = -111; } printf("iteration: %d\nresidual: %.1e\nGMRES time: %.2f seconds.\n", iter, resid, t); /* Scale the solution back if equilibration was performed. */ if (*equed == 'C' || *equed == 'B') for (i = 0; i < n; i++) x[i] *= C[i]; for (i = 0; i < m; i++) { maxferr = SUPERLU_MAX(maxferr, fabs(x[i] - xact[i])); } printf("||X-X_true||_oo = %.1e\n", maxferr); } #ifdef DEBUG printf("%d entries in L and %d entries in U dropped.\n", num_drop_L, num_drop_U); #endif fflush(stdout); if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); SUPERLU_FREE (rhsb); SUPERLU_FREE (rhsx); SUPERLU_FREE (xact); SUPERLU_FREE (etree); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); SUPERLU_FREE (R); SUPERLU_FREE (C); Destroy_CompCol_Matrix(&A); Destroy_SuperMatrix_Store(&B); Destroy_SuperMatrix_Store(&X); if ( lwork >= 0 ) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } SUPERLU_FREE(b); SUPERLU_FREE(x); #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Exit main()"); #endif return 0; }