int main(int argc, char *argv[]) { char equed[1]; yes_no_t equil; trans_t trans; SuperMatrix A, L, U; SuperMatrix B, X; NCformat *Astore; NCformat *Ustore; SCformat *Lstore; float *a; int *asub, *xa; int *perm_r; /* row permutations from partial pivoting */ int *perm_c; /* column permutation vector */ int *etree; void *work; int info, lwork, nrhs, ldx; int i, m, n, nnz; float *rhsb, *rhsx, *xact; float *R, *C; float *ferr, *berr; float u, rpg, rcond; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; extern void parse_command_line(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Enter main()"); #endif /* Defaults */ lwork = 0; nrhs = 1; equil = YES; u = 1.0; trans = NOTRANS; /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options(&options); /* Can use command line input to modify the defaults. */ parse_command_line(argc, argv, &lwork, &u, &equil, &trans); options.Equil = equil; options.DiagPivotThresh = u; options.Trans = trans; /* Add more functionalities that the defaults. */ options.PivotGrowth = YES; /* Compute reciprocal pivot growth */ options.ConditionNumber = YES;/* Compute reciprocal condition number */ options.IterRefine = SLU_SINGLE; /* Perform single-precision refinement */ if ( lwork > 0 ) { work = SUPERLU_MALLOC(lwork); if ( !work ) { ABORT("SLINSOLX: cannot allocate work[]"); } } /* Read matrix A from a file in Harwell-Boeing format.*/ sreadhb(&m, &n, &nnz, &a, &asub, &xa); sCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_S, SLU_GE); Astore = A.Store; printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz); if ( !(rhsb = floatMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[]."); if ( !(rhsx = floatMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[]."); sCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_S, SLU_GE); sCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_S, SLU_GE); xact = floatMalloc(n * nrhs); ldx = n; sGenXtrue(n, nrhs, xact, ldx); sFillRHS(trans, nrhs, xact, ldx, &A, &B); if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[]."); if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[]."); if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(R = (float *) SUPERLU_MALLOC(A.nrow * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for R[]."); if ( !(C = (float *) SUPERLU_MALLOC(A.ncol * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for C[]."); if ( !(ferr = (float *) SUPERLU_MALLOC(nrhs * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for ferr[]."); if ( !(berr = (float *) SUPERLU_MALLOC(nrhs * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for berr[]."); /* Initialize the statistics variables. */ StatInit(&stat); /* Solve the system and compute the condition number and error bounds using dgssvx. */ sgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info); printf("sgssvx(): info %d\n", info); if ( info == 0 || info == n+1 ) { /* This is how you could access the solution matrix. */ float *sol = (float*) ((DNformat*) X.Store)->nzval; if ( options.PivotGrowth == YES ) printf("Recip. pivot growth = %e\n", rpg); if ( options.ConditionNumber == YES ) printf("Recip. condition number = %e\n", rcond); if ( options.IterRefine != NOREFINE ) { printf("Iterative Refinement:\n"); printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"); for (i = 0; i < nrhs; ++i) printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]); } Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/nnz); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); fflush(stdout); } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); SUPERLU_FREE (rhsb); SUPERLU_FREE (rhsx); SUPERLU_FREE (xact); SUPERLU_FREE (etree); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); SUPERLU_FREE (R); SUPERLU_FREE (C); SUPERLU_FREE (ferr); SUPERLU_FREE (berr); Destroy_CompCol_Matrix(&A); Destroy_SuperMatrix_Store(&B); Destroy_SuperMatrix_Store(&X); if ( lwork == 0 ) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } else if ( lwork > 0 ) { SUPERLU_FREE(work); } #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Exit main()"); #endif }
int main(int argc, char *argv[]) { void smatvec_mult(float alpha, float x[], float beta, float y[]); void spsolve(int n, float x[], float y[]); extern int sfgmr( int n, void (*matvec_mult)(float, float [], float, float []), void (*psolve)(int n, float [], float[]), float *rhs, float *sol, double tol, int restrt, int *itmax, FILE *fits); extern int sfill_diag(int n, NCformat *Astore); char equed[1] = {'B'}; yes_no_t equil; trans_t trans; SuperMatrix A, L, U; SuperMatrix B, X; NCformat *Astore; NCformat *Ustore; SCformat *Lstore; GlobalLU_t Glu; /* facilitate multiple factorizations with SamePattern_SameRowPerm */ float *a; int *asub, *xa; int *etree; int *perm_c; /* column permutation vector */ int *perm_r; /* row permutations from partial pivoting */ int nrhs, ldx, lwork, info, m, n, nnz; float *rhsb, *rhsx, *xact; float *work = NULL; float *R, *C; float u, rpg, rcond; float zero = 0.0; float one = 1.0; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; FILE *fp = stdin; int restrt, iter, maxit, i; double resid; float *x, *b; #ifdef DEBUG extern int num_drop_L, num_drop_U; #endif #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Enter main()"); #endif /* Defaults */ lwork = 0; nrhs = 1; trans = NOTRANS; /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 0.1; //different from complete LU options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; options.RowPerm = LargeDiag; options.ILU_DropTol = 1e-4; options.ILU_FillTol = 1e-2; options.ILU_FillFactor = 10.0; options.ILU_DropRule = DROP_BASIC | DROP_AREA; options.ILU_Norm = INF_NORM; options.ILU_MILU = SILU; */ ilu_set_default_options(&options); /* Modify the defaults. */ options.PivotGrowth = YES; /* Compute reciprocal pivot growth */ options.ConditionNumber = YES;/* Compute reciprocal condition number */ if ( lwork > 0 ) { work = SUPERLU_MALLOC(lwork); if ( !work ) ABORT("Malloc fails for work[]."); } /* Read matrix A from a file in Harwell-Boeing format.*/ if (argc < 2) { printf("Usage:\n%s [OPTION] < [INPUT] > [OUTPUT]\nOPTION:\n" "-h -hb:\n\t[INPUT] is a Harwell-Boeing format matrix.\n" "-r -rb:\n\t[INPUT] is a Rutherford-Boeing format matrix.\n" "-t -triplet:\n\t[INPUT] is a triplet format matrix.\n", argv[0]); return 0; } else { switch (argv[1][1]) { case 'H': case 'h': printf("Input a Harwell-Boeing format matrix:\n"); sreadhb(fp, &m, &n, &nnz, &a, &asub, &xa); break; case 'R': case 'r': printf("Input a Rutherford-Boeing format matrix:\n"); sreadrb(&m, &n, &nnz, &a, &asub, &xa); break; case 'T': case 't': printf("Input a triplet format matrix:\n"); sreadtriple(&m, &n, &nnz, &a, &asub, &xa); break; default: printf("Unrecognized format.\n"); return 0; } } sCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_S, SLU_GE); Astore = A.Store; sfill_diag(n, Astore); printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz); fflush(stdout); /* Generate the right-hand side */ if ( !(rhsb = floatMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[]."); if ( !(rhsx = floatMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[]."); sCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_S, SLU_GE); sCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_S, SLU_GE); xact = floatMalloc(n * nrhs); ldx = n; sGenXtrue(n, nrhs, xact, ldx); sFillRHS(trans, nrhs, xact, ldx, &A, &B); if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[]."); if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[]."); if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(R = (float *) SUPERLU_MALLOC(A.nrow * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for R[]."); if ( !(C = (float *) SUPERLU_MALLOC(A.ncol * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for C[]."); info = 0; #ifdef DEBUG num_drop_L = 0; num_drop_U = 0; #endif /* Initialize the statistics variables. */ StatInit(&stat); /* Compute the incomplete factorization and compute the condition number and pivot growth using dgsisx. */ B.ncol = 0; /* not to perform triangular solution */ sgsisx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, &Glu, &mem_usage, &stat, &info); /* Set RHS for GMRES. */ if (!(b = floatMalloc(m))) ABORT("Malloc fails for b[]."); if (*equed == 'R' || *equed == 'B') { for (i = 0; i < n; ++i) b[i] = rhsb[i] * R[i]; } else { for (i = 0; i < m; i++) b[i] = rhsb[i]; } printf("sgsisx(): info %d, equed %c\n", info, equed[0]); if (info > 0 || rcond < 1e-8 || rpg > 1e8) printf("WARNING: This preconditioner might be unstable.\n"); if ( info == 0 || info == n+1 ) { if ( options.PivotGrowth == YES ) printf("Recip. pivot growth = %e\n", rpg); if ( options.ConditionNumber == YES ) printf("Recip. condition number = %e\n", rcond); } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("n(A) = %d, nnz(A) = %d\n", n, Astore->nnz); printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("Fill ratio: nnz(F)/nnz(A) = %.3f\n", ((double)(Lstore->nnz) + (double)(Ustore->nnz) - (double)n) / (double)Astore->nnz); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); fflush(stdout); /* Set the global variables. */ GLOBAL_A = &A; GLOBAL_L = &L; GLOBAL_U = &U; GLOBAL_STAT = &stat; GLOBAL_PERM_C = perm_c; GLOBAL_PERM_R = perm_r; GLOBAL_OPTIONS = &options; GLOBAL_R = R; GLOBAL_C = C; GLOBAL_MEM_USAGE = &mem_usage; /* Set the options to do solve-only. */ options.Fact = FACTORED; options.PivotGrowth = NO; options.ConditionNumber = NO; /* Set the variables used by GMRES. */ restrt = SUPERLU_MIN(n / 3 + 1, 50); maxit = 1000; iter = maxit; resid = 1e-8; if (!(x = floatMalloc(n))) ABORT("Malloc fails for x[]."); if (info <= n + 1) { int i_1 = 1; double maxferr = 0.0, nrmA, nrmB, res, t; float temp; extern float snrm2_(int *, float [], int *); extern void saxpy_(int *, float *, float [], int *, float [], int *); /* Initial guess */ for (i = 0; i < n; i++) x[i] = zero; t = SuperLU_timer_(); /* Call GMRES */ sfgmr(n, smatvec_mult, spsolve, b, x, resid, restrt, &iter, stdout); t = SuperLU_timer_() - t; /* Output the result. */ nrmA = snrm2_(&(Astore->nnz), (float *)((DNformat *)A.Store)->nzval, &i_1); nrmB = snrm2_(&m, b, &i_1); sp_sgemv("N", -1.0, &A, x, 1, 1.0, b, 1); res = snrm2_(&m, b, &i_1); resid = res / nrmB; printf("||A||_F = %.1e, ||B||_2 = %.1e, ||B-A*X||_2 = %.1e, " "relres = %.1e\n", nrmA, nrmB, res, resid); if (iter >= maxit) { if (resid >= 1.0) iter = -180; else if (resid > 1e-8) iter = -111; } printf("iteration: %d\nresidual: %.1e\nGMRES time: %.2f seconds.\n", iter, resid, t); /* Scale the solution back if equilibration was performed. */ if (*equed == 'C' || *equed == 'B') for (i = 0; i < n; i++) x[i] *= C[i]; for (i = 0; i < m; i++) { maxferr = SUPERLU_MAX(maxferr, fabs(x[i] - xact[i])); } printf("||X-X_true||_oo = %.1e\n", maxferr); } #ifdef DEBUG printf("%d entries in L and %d entries in U dropped.\n", num_drop_L, num_drop_U); #endif fflush(stdout); if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); SUPERLU_FREE (rhsb); SUPERLU_FREE (rhsx); SUPERLU_FREE (xact); SUPERLU_FREE (etree); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); SUPERLU_FREE (R); SUPERLU_FREE (C); Destroy_CompCol_Matrix(&A); Destroy_SuperMatrix_Store(&B); Destroy_SuperMatrix_Store(&X); if ( lwork >= 0 ) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } SUPERLU_FREE(b); SUPERLU_FREE(x); #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Exit main()"); #endif return 0; }
int main(int argc, char *argv[]) { /* * Purpose * ======= * * The driver program CLINSOLX2. * * This example illustrates how to use CGSSVX to solve systems repeatedly * with the same sparsity pattern of matrix A. * In this case, the column permutation vector perm_c is computed once. * The following data structures will be reused in the subsequent call to * CGSSVX: perm_c, etree * */ char equed[1]; yes_no_t equil; trans_t trans; SuperMatrix A, A1, L, U; SuperMatrix B, B1, X; NCformat *Astore; NCformat *Ustore; SCformat *Lstore; complex *a, *a1; int *asub, *xa, *asub1, *xa1; int *perm_r; /* row permutations from partial pivoting */ int *perm_c; /* column permutation vector */ int *etree; void *work; int info, lwork, nrhs, ldx; int i, j, m, n, nnz; complex *rhsb, *rhsb1, *rhsx, *xact; float *R, *C; float *ferr, *berr; float u, rpg, rcond; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; extern void parse_command_line(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Enter main()"); #endif /* Defaults */ lwork = 0; nrhs = 1; equil = YES; u = 1.0; trans = NOTRANS; /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options(&options); /* Can use command line input to modify the defaults. */ parse_command_line(argc, argv, &lwork, &u, &equil, &trans); options.Equil = equil; options.DiagPivotThresh = u; options.Trans = trans; if ( lwork > 0 ) { work = SUPERLU_MALLOC(lwork); if ( !work ) { ABORT("DLINSOLX: cannot allocate work[]"); } } /* Read matrix A from a file in Harwell-Boeing format.*/ creadhb(&m, &n, &nnz, &a, &asub, &xa); if ( !(a1 = complexMalloc(nnz)) ) ABORT("Malloc fails for a1[]."); if ( !(asub1 = intMalloc(nnz)) ) ABORT("Malloc fails for asub1[]."); if ( !(xa1 = intMalloc(n+1)) ) ABORT("Malloc fails for xa1[]."); for (i = 0; i < nnz; ++i) { a1[i] = a[i]; asub1[i] = asub[i]; } for (i = 0; i < n+1; ++i) xa1[i] = xa[i]; cCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_C, SLU_GE); Astore = A.Store; printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz); if ( !(rhsb = complexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[]."); if ( !(rhsb1 = complexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb1[]."); if ( !(rhsx = complexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[]."); cCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_C, SLU_GE); cCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_C, SLU_GE); xact = complexMalloc(n * nrhs); ldx = n; cGenXtrue(n, nrhs, xact, ldx); cFillRHS(trans, nrhs, xact, ldx, &A, &B); for (j = 0; j < nrhs; ++j) for (i = 0; i < m; ++i) rhsb1[i+j*m] = rhsb[i+j*m]; if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[]."); if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[]."); if ( !(R = (float *) SUPERLU_MALLOC(A.nrow * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for R[]."); if ( !(C = (float *) SUPERLU_MALLOC(A.ncol * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for C[]."); if ( !(ferr = (float *) SUPERLU_MALLOC(nrhs * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for ferr[]."); if ( !(berr = (float *) SUPERLU_MALLOC(nrhs * sizeof(float))) ) ABORT("SUPERLU_MALLOC fails for berr[]."); /* Initialize the statistics variables. */ StatInit(&stat); /* ------------------------------------------------------------ WE SOLVE THE LINEAR SYSTEM FOR THE FIRST TIME: AX = B ------------------------------------------------------------*/ cgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info); printf("First system: cgssvx() returns info %d\n", info); if ( info == 0 || info == n+1 ) { /* This is how you could access the solution matrix. */ complex *sol = (complex*) ((DNformat*) X.Store)->nzval; if ( options.PivotGrowth ) printf("Recip. pivot growth = %e\n", rpg); if ( options.ConditionNumber ) printf("Recip. condition number = %e\n", rcond); Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/nnz); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); if ( options.IterRefine ) { printf("Iterative Refinement:\n"); printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"); for (i = 0; i < nrhs; ++i) printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]); } fflush(stdout); } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); Destroy_CompCol_Matrix(&A); Destroy_Dense_Matrix(&B); if ( lwork >= 0 ) { /* Deallocate storage associated with L and U. */ Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } /* ------------------------------------------------------------ NOW WE SOLVE ANOTHER LINEAR SYSTEM: A1*X = B1 ONLY THE SPARSITY PATTERN OF A1 IS THE SAME AS THAT OF A. ------------------------------------------------------------*/ options.Fact = SamePattern; StatInit(&stat); /* Initialize the statistics variables. */ cCreate_CompCol_Matrix(&A1, m, n, nnz, a1, asub1, xa1, SLU_NC, SLU_C, SLU_GE); cCreate_Dense_Matrix(&B1, m, nrhs, rhsb1, m, SLU_DN, SLU_C, SLU_GE); cgssvx(&options, &A1, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B1, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info); printf("\nSecond system: cgssvx() returns info %d\n", info); if ( info == 0 || info == n+1 ) { /* This is how you could access the solution matrix. */ complex *sol = (complex*) ((DNformat*) X.Store)->nzval; if ( options.PivotGrowth ) printf("Recip. pivot growth = %e\n", rpg); if ( options.ConditionNumber ) printf("Recip. condition number = %e\n", rcond); Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); if ( options.IterRefine ) { printf("Iterative Refinement:\n"); printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"); for (i = 0; i < nrhs; ++i) printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]); } fflush(stdout); } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); SUPERLU_FREE (xact); SUPERLU_FREE (etree); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); SUPERLU_FREE (R); SUPERLU_FREE (C); SUPERLU_FREE (ferr); SUPERLU_FREE (berr); Destroy_CompCol_Matrix(&A1); Destroy_Dense_Matrix(&B1); Destroy_Dense_Matrix(&X); if ( lwork == 0 ) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } else if ( lwork > 0 ) { SUPERLU_FREE(work); } #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Exit main()"); #endif }
bool SuperLUSolver::Solve(SparseMatrixType& rA, VectorType& rX, VectorType& rB) { //std::cout << "matrix size in solver: " << rA.size1() << std::endl; //std::cout << "RHS size in solver SLU: " << rB.size() << std::endl; // typedef ublas::compressed_matrix<double, ublas::row_major, 0, // ublas::unbounded_array<int>, ublas::unbounded_array<double> > cm_t; //make a copy of the RHS VectorType rC = rB; superlu_options_t options; SuperLUStat_t stat; /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options(&options); options.IterRefine = SLU_DOUBLE; // options.ColPerm = MMD_AT_PLUS_A; //Fill the SuperLU matrices SuperMatrix Aslu, B, L, U; //create a copy of the matrix int *index1_vector = new (std::nothrow) int[rA.index1_data().size()]; int *index2_vector = new (std::nothrow) int[rA.index2_data().size()]; // double *values_vector = new (std::nothrow) double[rA.value_data().size()]; for( int unsigned i = 0; i < rA.index1_data().size(); i++ ) index1_vector[i] = (int)rA.index1_data()[i]; for( unsigned int i = 0; i < rA.index2_data().size(); i++ ) index2_vector[i] = (int)rA.index2_data()[i]; /* for( unsigned int i = 0; i < rA.value_data().size(); i++ ) values_vector[i] = (double)rA.value_data()[i];*/ //create a copy of the rhs vector (it will be overwritten with the solution) /* double *b_vector = new (std::nothrow) double[rB.size()]; for( unsigned int i = 0; i < rB.size(); i++ ) b_vector[i] = rB[i];*/ /* dCreate_CompCol_Matrix (&Aslu, rA.size1(), rA.size2(), rA.nnz(), values_vector, index2_vector, index1_vector, SLU_NR, SLU_D, SLU_GE );*/ //works also with dCreate_CompCol_Matrix dCreate_CompRow_Matrix (&Aslu, rA.size1(), rA.size2(), rA.nnz(), rA.value_data().begin(), index2_vector, //can not avoid a copy as ublas uses unsigned int internally index1_vector, //can not avoid a copy as ublas uses unsigned int internally SLU_NR, SLU_D, SLU_GE ); dCreate_Dense_Matrix (&B, rB.size(), 1,&rB[0],rB.size(),SLU_DN, SLU_D, SLU_GE); //allocate memory for permutation arrays int* perm_c; int* perm_r; if ( !(perm_c = intMalloc(rA.size1())) ) ABORT("Malloc fails for perm_c[]."); if ( !(perm_r = intMalloc(rA.size2())) ) ABORT("Malloc fails for perm_r[]."); //initialize container for statistical data StatInit(&stat); //call solver routine int info; dgssv(&options, &Aslu, perm_c, perm_r, &L, &U, &B, &stat, &info); //print output if (options.PrintStat) { StatPrint(&stat); } //resubstitution of results #pragma omp parallel for for(int i=0; i<static_cast<int>(rB.size()); i++ ) rX[i] = rB[i]; // B(i,0); //recover the RHS rB=rC; //deallocate memory used StatFree(&stat); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); Destroy_SuperMatrix_Store(&Aslu); //note that by using the "store" function we will take care of deallocation ourselves Destroy_SuperMatrix_Store(&B); Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); delete [] index1_vector; delete [] index2_vector; // delete [] b_vector; //CHECK WITH VALGRIND IF THIS IS NEEDED ...or if it is done by the lines above //deallocate tempory storage used for the matrix // if(b_vector!=NULL) delete [] index1_vector; // // if(b_vector!=NULL) delete [] index2_vector; // if(b_vector!=NULL) delete [] values_vector; // if(b_vector!=NULL) delete [] b_vector; return true; }
main(int argc, char *argv[]) { /* * Purpose * ======= * * The driver program ZLINSOLX1. * * This example illustrates how to use ZGSSVX to solve systems with the same * A but different right-hand side. * In this case, we factorize A only once in the first call to DGSSVX, * and reuse the following data structures in the subsequent call to ZGSSVX: * perm_c, perm_r, R, C, L, U. * */ char equed[1]; yes_no_t equil; trans_t trans; SuperMatrix A, L, U; SuperMatrix B, X; NCformat *Astore; NCformat *Ustore; SCformat *Lstore; doublecomplex *a; int *asub, *xa; int *perm_c; /* column permutation vector */ int *perm_r; /* row permutations from partial pivoting */ int *etree; void *work; int info, lwork, nrhs, ldx; int i, m, n, nnz; doublecomplex *rhsb, *rhsx, *xact; double *R, *C; double *ferr, *berr; double u, rpg, rcond; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; extern void parse_command_line(); #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Enter main()"); #endif /* Defaults */ lwork = 0; nrhs = 1; equil = YES; u = 1.0; trans = NOTRANS; /* Set the default values for options argument: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options(&options); /* Can use command line input to modify the defaults. */ parse_command_line(argc, argv, &lwork, &u, &equil, &trans); options.Equil = equil; options.DiagPivotThresh = u; options.Trans = trans; if ( lwork > 0 ) { work = SUPERLU_MALLOC(lwork); if ( !work ) { ABORT("ZLINSOLX: cannot allocate work[]"); } } /* Read matrix A from a file in Harwell-Boeing format.*/ zreadhb(&m, &n, &nnz, &a, &asub, &xa); zCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_Z, SLU_GE); Astore = A.Store; printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz); if ( !(rhsb = doublecomplexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsb[]."); if ( !(rhsx = doublecomplexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhsx[]."); zCreate_Dense_Matrix(&B, m, nrhs, rhsb, m, SLU_DN, SLU_Z, SLU_GE); zCreate_Dense_Matrix(&X, m, nrhs, rhsx, m, SLU_DN, SLU_Z, SLU_GE); xact = doublecomplexMalloc(n * nrhs); ldx = n; zGenXtrue(n, nrhs, xact, ldx); zFillRHS(trans, nrhs, xact, ldx, &A, &B); if ( !(etree = intMalloc(n)) ) ABORT("Malloc fails for etree[]."); if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[]."); if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(R = (double *) SUPERLU_MALLOC(A.nrow * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for R[]."); if ( !(C = (double *) SUPERLU_MALLOC(A.ncol * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for C[]."); if ( !(ferr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for ferr[]."); if ( !(berr = (double *) SUPERLU_MALLOC(nrhs * sizeof(double))) ) ABORT("SUPERLU_MALLOC fails for berr[]."); /* Initialize the statistics variables. */ StatInit(&stat); /* ONLY PERFORM THE LU DECOMPOSITION */ B.ncol = 0; /* Indicate not to solve the system */ zgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info); printf("LU factorization: zgssvx() returns info %d\n", info); if ( info == 0 || info == n+1 ) { if ( options.PivotGrowth ) printf("Recip. pivot growth = %e\n", rpg); if ( options.ConditionNumber ) printf("Recip. condition number = %e\n", rcond); Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/nnz); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); fflush(stdout); } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); /* ------------------------------------------------------------ NOW WE SOLVE THE LINEAR SYSTEM USING THE FACTORED FORM OF A. ------------------------------------------------------------*/ options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */ B.ncol = nrhs; /* Set the number of right-hand side */ /* Initialize the statistics variables. */ StatInit(&stat); zgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info); printf("Triangular solve: zgssvx() returns info %d\n", info); if ( info == 0 || info == n+1 ) { /* This is how you could access the solution matrix. */ doublecomplex *sol = (doublecomplex*) ((DNformat*) X.Store)->nzval; if ( options.IterRefine ) { printf("Iterative Refinement:\n"); printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"); for (i = 0; i < nrhs; ++i) printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]); } fflush(stdout); } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); SUPERLU_FREE (rhsb); SUPERLU_FREE (rhsx); SUPERLU_FREE (xact); SUPERLU_FREE (etree); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); SUPERLU_FREE (R); SUPERLU_FREE (C); SUPERLU_FREE (ferr); SUPERLU_FREE (berr); Destroy_CompCol_Matrix(&A); Destroy_SuperMatrix_Store(&B); Destroy_SuperMatrix_Store(&X); if ( lwork == 0 ) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); } else if ( lwork > 0 ) { SUPERLU_FREE(work); } #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Exit main()"); #endif }
main(int argc, char *argv[]) { SuperMatrix A; NCformat *Astore; double *a; int *asub, *xa; int *perm_c; /* column permutation vector */ int *perm_r; /* row permutations from partial pivoting */ SuperMatrix L; /* factor L */ SCformat *Lstore; SuperMatrix U; /* factor U */ NCformat *Ustore; SuperMatrix B; int nrhs, ldx, info, m, n, nnz; double *xact, *rhs; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Enter main()"); #endif /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options(&options); /* Now we modify the default options to use the symmetric mode. */ options.SymmetricMode = YES; options.ColPerm = MMD_AT_PLUS_A; options.DiagPivotThresh = 0.001; #if 1 /* Read matrix A from a file in Harwell-Boeing format.*/ if (argc < 2) { printf("Usage:\n%s [OPTION] < [INPUT] > [OUTPUT]\nOPTION:\n" "-h -hb:\n\t[INPUT] is a Harwell-Boeing format matrix.\n" "-r -rb:\n\t[INPUT] is a Rutherford-Boeing format matrix.\n" "-t -triplet:\n\t[INPUT] is a triplet format matrix.\n", argv[0]); return 0; } else { switch (argv[1][1]) { case 'H': case 'h': printf("Input a Harwell-Boeing format matrix:\n"); dreadhb(&m, &n, &nnz, &a, &asub, &xa); break; case 'R': case 'r': printf("Input a Rutherford-Boeing format matrix:\n"); dreadrb(&m, &n, &nnz, &a, &asub, &xa); break; case 'T': case 't': printf("Input a triplet format matrix:\n"); dreadtriple(&m, &n, &nnz, &a, &asub, &xa); break; default: printf("Unrecognized format.\n"); return 0; } } #else /* Read the matrix in Harwell-Boeing format. */ dreadhb(&m, &n, &nnz, &a, &asub, &xa); #endif dCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE); Astore = A.Store; printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz); nrhs = 1; if ( !(rhs = doubleMalloc(m * nrhs)) ) ABORT("Malloc fails for rhs[]."); dCreate_Dense_Matrix(&B, m, nrhs, rhs, m, SLU_DN, SLU_D, SLU_GE); xact = doubleMalloc(n * nrhs); ldx = n; dGenXtrue(n, nrhs, xact, ldx); dFillRHS(options.Trans, nrhs, xact, ldx, &A, &B); if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[]."); /* Initialize the statistics variables. */ StatInit(&stat); dgssv(&options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info); if ( info == 0 ) { /* This is how you could access the solution matrix. */ double *sol = (double*) ((DNformat*) B.Store)->nzval; /* Compute the infinity norm of the error. */ dinf_norm_error(nrhs, &B, xact); Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("FILL ratio = %.1f\n", (float)(Lstore->nnz + Ustore->nnz - n)/nnz); dQuerySpace(&L, &U, &mem_usage); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); } else { printf("dgssv() error returns INFO= %d\n", info); if ( info <= n ) { /* factorization completes */ dQuerySpace(&L, &U, &mem_usage); printf("L\\U MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); } } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); SUPERLU_FREE (rhs); SUPERLU_FREE (xact); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); Destroy_CompCol_Matrix(&A); Destroy_SuperMatrix_Store(&B); Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Exit main()"); #endif }
int main ( int argc, char *argv[] ) /**********************************************************************/ /* Purpose: SUPER_LU_S3 solves a sparse system read from a file using SGSSVX. Discussion: The sparse matrix is stored in a file using the Harwell-Boeing sparse matrix format. The file should be assigned to the standard input of this program. For instance, if the matrix is stored in the file "g10_rua.txt", the execution command might be: super_lu_s3 < g10_rua.txt Modified: 25 April 2004 Reference: James Demmel, John Gilbert, Xiaoye Li, SuperLU Users's Guide, Sections 1 and 2. Local parameters: SuperMatrix L, the computed L factor. int *perm_c, the column permutation vector. int *perm_r, the row permutations from partial pivoting. SuperMatrix U, the computed U factor. */ { SuperMatrix A; NCformat *Astore; float *a; int *asub; SuperMatrix B; float *berr; float *C; char equed[1]; yes_no_t equil; int *etree; float *ferr; int i; int info; SuperMatrix L; int ldx; SCformat *Lstore; int lwork; int m; mem_usage_t mem_usage; int n; int nnz; int nrhs; superlu_options_t options; int *perm_c; int *perm_r; float *R; float rcond; float *rhsb; float *rhsx; float rpg; float *sol; SuperLUStat_t stat; trans_t trans; SuperMatrix U; float u; NCformat *Ustore; void *work; SuperMatrix X; int *xa; float *xact; /* Say hello. */ printf ( "\n" ); printf ( "SUPER_LU_S3:\n" ); printf ( " Read a sparse matrix A from standard input,\n"); printf ( " stored in Harwell-Boeing Sparse Matrix format.\n" ); printf ( "\n" ); printf ( " Solve a linear system A * X = B using SGSSVX.\n" ); /* Defaults */ lwork = 0; nrhs = 1; equil = YES; u = 1.0; trans = NOTRANS; /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options ( &options ); /* Can use command line input to modify the defaults. */ parse_command_line ( argc, argv, &lwork, &u, &equil, &trans ); options.Equil = equil; options.DiagPivotThresh = u; options.Trans = trans; printf ( "\n" ); printf ( " Length of work array LWORK = %d\n", lwork ); printf ( " Equilibration option EQUIL = %d\n", equil ); printf ( " Diagonal pivot threshhold value U = %f\n", u ); printf ( " Tranpose option TRANS = %d\n", trans ); /* Add more functionalities that the defaults. Compute reciprocal pivot growth */ options.PivotGrowth = YES; /* Compute reciprocal condition number */ options.ConditionNumber = YES; /* Perform single-precision refinement */ options.IterRefine = SINGLE; if ( 0 < lwork ) { work = SUPERLU_MALLOC(lwork); if ( !work ) { ABORT ( "SUPERLU_MALLOC cannot allocate work[]" ); } } /* Read matrix A from a file in Harwell-Boeing format. */ sreadhb ( &m, &n, &nnz, &a, &asub, &xa ); /* Create storage for a compressed column matrix. */ sCreate_CompCol_Matrix ( &A, m, n, nnz, a, asub, xa, SLU_NC, SLU_S, SLU_GE ); Astore = A.Store; printf ( "\n" ); printf ( " Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz ); rhsb = floatMalloc ( m * nrhs ); if ( !rhsb ) { ABORT ( "Malloc fails for rhsb[]." ); } rhsx = floatMalloc ( m * nrhs ); if ( !rhsx ) { ABORT ( "Malloc fails for rhsx[]." ); } sCreate_Dense_Matrix ( &B, m, nrhs, rhsb, m, SLU_DN, SLU_S, SLU_GE ); sCreate_Dense_Matrix ( &X, m, nrhs, rhsx, m, SLU_DN, SLU_S, SLU_GE ); xact = floatMalloc ( n * nrhs ); if ( !xact ) { ABORT ( "SUPERLU_MALLOC cannot allocate xact[]" ); } ldx = n; sGenXtrue ( n, nrhs, xact, ldx ); sFillRHS ( trans, nrhs, xact, ldx, &A, &B ); etree = intMalloc ( n ); if ( !etree ) { ABORT ( "Malloc fails for etree[]." ); } perm_c = intMalloc ( n ); if ( !perm_c ) { ABORT ( "Malloc fails for perm_c[]." ); } perm_r = intMalloc ( m ); if ( !perm_r ) { ABORT ( "Malloc fails for perm_r[]." ); } R = (float *) SUPERLU_MALLOC ( A.nrow * sizeof(float) ); if ( !R ) { ABORT ( "SUPERLU_MALLOC fails for R[]." ); } C = (float *) SUPERLU_MALLOC ( A.ncol * sizeof(float) ); if ( !C ) { ABORT ( "SUPERLU_MALLOC fails for C[]." ); } ferr = (float *) SUPERLU_MALLOC ( nrhs * sizeof(float) ); if ( !ferr ) { ABORT ( "SUPERLU_MALLOC fails for ferr[]." ); } berr = (float *) SUPERLU_MALLOC ( nrhs * sizeof(float) ); if ( !berr ) { ABORT ( "SUPERLU_MALLOC fails for berr[]." ); } /* Initialize the statistics variables. */ StatInit(&stat); /* Solve the system and compute the condition number and error bounds using SGSSVX. */ sgssvx ( &options, &A, perm_c, perm_r, etree, equed, R, C, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info ); printf ( "\n" ); printf ( " SGSSVX returns INFO = %d\n", info ); if ( info == 0 || info == n+1 ) { sol = (float*) ((DNformat*) X.Store)->nzval; if ( options.PivotGrowth == YES ) { printf ( "\n" ); printf ( " Reciprocal pivot growth = %e\n", rpg); } if ( options.ConditionNumber == YES ) { printf ( "\n" ); printf ( " Reciprocal condition number = %e\n", rcond); } if ( options.IterRefine != NOREFINE ) { printf ( "\n" ); printf ( " Iterative Refinement:\n"); printf ( "%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"); for ( i = 0; i < nrhs; i++ ) { printf ( "%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr[i], berr[i]); } } Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf ( "\n" ); printf ( " Number of nonzeros in factor L = %d\n", Lstore->nnz ); printf ( " Number of nonzeros in factor U = %d\n", Ustore->nnz ); printf ( " Number of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n ); printf ( "\n" ); printf ( " L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6, mem_usage.expansions ); fflush ( stdout ); } else if ( info > 0 && lwork == -1 ) { printf ( "\n" ); printf ( " Estimated memory: %d bytes\n", info - n ); } if ( options.PrintStat ) { StatPrint ( &stat ); } StatFree ( &stat ); SUPERLU_FREE ( rhsb ); SUPERLU_FREE ( rhsx ); SUPERLU_FREE ( xact ); SUPERLU_FREE ( etree ); SUPERLU_FREE ( perm_r ); SUPERLU_FREE ( perm_c ); SUPERLU_FREE ( R ); SUPERLU_FREE ( C ); SUPERLU_FREE ( ferr ); SUPERLU_FREE ( berr ); Destroy_CompCol_Matrix ( &A ); Destroy_SuperMatrix_Store ( &B ); Destroy_SuperMatrix_Store ( &X ); if ( 0 <= lwork ) { Destroy_SuperNode_Matrix ( &L ); Destroy_CompCol_Matrix ( &U ); } /* Say goodbye. */ printf ( "\n" ); printf ( "SUPER_LU_S3:\n" ); printf ( " Normal end of execution.\n"); return 0; }
main(int argc, char *argv[]) { SuperMatrix A; NCformat *Astore; doublecomplex *a; int *asub, *xa; int *perm_c; /* column permutation vector */ int *perm_r; /* row permutations from partial pivoting */ SuperMatrix L; /* factor L */ SCformat *Lstore; SuperMatrix U; /* factor U */ NCformat *Ustore; SuperMatrix B; int nrhs, ldx, info, m, n, nnz; doublecomplex *xact, *rhs; mem_usage_t mem_usage; superlu_options_t options; SuperLUStat_t stat; #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Enter main()"); #endif /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options(&options); /* Now we modify the default options to use the symmetric mode. */ options.SymmetricMode = YES; options.ColPerm = MMD_AT_PLUS_A; options.DiagPivotThresh = 0.001; /* Read the matrix in Harwell-Boeing format. */ zreadhb(&m, &n, &nnz, &a, &asub, &xa); zCreate_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, SLU_Z, SLU_GE); Astore = A.Store; printf("Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz); nrhs = 1; if ( !(rhs = doublecomplexMalloc(m * nrhs)) ) ABORT("Malloc fails for rhs[]."); zCreate_Dense_Matrix(&B, m, nrhs, rhs, m, SLU_DN, SLU_Z, SLU_GE); xact = doublecomplexMalloc(n * nrhs); ldx = n; zGenXtrue(n, nrhs, xact, ldx); zFillRHS(options.Trans, nrhs, xact, ldx, &A, &B); if ( !(perm_c = intMalloc(n)) ) ABORT("Malloc fails for perm_c[]."); if ( !(perm_r = intMalloc(m)) ) ABORT("Malloc fails for perm_r[]."); /* Initialize the statistics variables. */ StatInit(&stat); zgssv(&options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info); if ( info == 0 ) { /* This is how you could access the solution matrix. */ doublecomplex *sol = (doublecomplex*) ((DNformat*) B.Store)->nzval; /* Compute the infinity norm of the error. */ zinf_norm_error(nrhs, &B, xact); Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); zQuerySpace(&L, &U, &mem_usage); printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6, mem_usage.expansions); } else { printf("zgssv() error returns INFO= %d\n", info); if ( info <= n ) { /* factorization completes */ zQuerySpace(&L, &U, &mem_usage); printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6, mem_usage.expansions); } } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); SUPERLU_FREE (rhs); SUPERLU_FREE (xact); SUPERLU_FREE (perm_r); SUPERLU_FREE (perm_c); Destroy_CompCol_Matrix(&A); Destroy_SuperMatrix_Store(&B); Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&U); #if ( DEBUGlevel>=1 ) CHECK_MALLOC("Exit main()"); #endif }
PetscErrorCode MatSolve_SuperLU_Private(Mat A,Vec b,Vec x) { Mat_SuperLU *lu = (Mat_SuperLU*)A->spptr; PetscScalar *barray,*xarray; PetscErrorCode ierr; PetscInt info,i,n=x->map->n; PetscReal ferr,berr; PetscFunctionBegin; if ( lu->lwork == -1 ) { PetscFunctionReturn(0); } lu->B.ncol = 1; /* Set the number of right-hand side */ if (lu->options.Equil && !lu->rhs_dup){ /* superlu overwrites b when Equil is used, thus create rhs_dup to keep user's b unchanged */ ierr = PetscMalloc(n*sizeof(PetscScalar),&lu->rhs_dup);CHKERRQ(ierr); } if (lu->options.Equil){ /* Copy b into rsh_dup */ ierr = VecGetArray(b,&barray);CHKERRQ(ierr); ierr = PetscMemcpy(lu->rhs_dup,barray,n*sizeof(PetscScalar));CHKERRQ(ierr); ierr = VecRestoreArray(b,&barray);CHKERRQ(ierr); barray = lu->rhs_dup; } else { ierr = VecGetArray(b,&barray);CHKERRQ(ierr); } ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); #if defined(PETSC_USE_COMPLEX) ((DNformat*)lu->B.Store)->nzval = (doublecomplex*)barray; ((DNformat*)lu->X.Store)->nzval = (doublecomplex*)xarray; #else ((DNformat*)lu->B.Store)->nzval = barray; ((DNformat*)lu->X.Store)->nzval = xarray; #endif lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */ if (A->factortype == MAT_FACTOR_LU){ #if defined(PETSC_USE_COMPLEX) zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->mem_usage, &lu->stat, &info); #else dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->mem_usage, &lu->stat, &info); #endif } else if (A->factortype == MAT_FACTOR_ILU){ #if defined(PETSC_USE_COMPLEX) zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->mem_usage, &lu->stat, &info); #else dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->mem_usage, &lu->stat, &info); #endif } else { SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); } if (!lu->options.Equil){ ierr = VecRestoreArray(b,&barray);CHKERRQ(ierr); } ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); if ( !info || info == lu->A.ncol+1 ) { if ( lu->options.IterRefine ) { ierr = PetscPrintf(PETSC_COMM_SELF,"Iterative Refinement:\n"); ierr = PetscPrintf(PETSC_COMM_SELF," %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"); for (i = 0; i < 1; ++i) ierr = PetscPrintf(PETSC_COMM_SELF," %8d%8d%16e%16e\n", i+1, lu->stat.RefineSteps, ferr, berr); } } else if ( info > 0 ){ if ( lu->lwork == -1 ) { ierr = PetscPrintf(PETSC_COMM_SELF," ** Estimated memory: %D bytes\n", info - lu->A.ncol); } else { ierr = PetscPrintf(PETSC_COMM_SELF," Warning: gssvx() returns info %D\n",info); } } else if (info < 0){ SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", info,-info); } if ( lu->options.PrintStat ) { ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve__SuperLU():\n"); StatPrint(&lu->stat); } PetscFunctionReturn(0); }
SolveSuperLU (const MatriceMorse<R> &AA, int strategy, double ttgv, double epsilon, double pivot, double pivot_sym, string ¶m_char, KN<long> pperm_r, KN<long> pperm_c): eps(epsilon), epsr(0), tgv(ttgv), etree(0), string_option(param_char), perm_r(pperm_r), perm_c(pperm_c), RR(0), CC(0), tol_pivot_sym(pivot_sym), tol_pivot(pivot) { SuperMatrix B, X; SuperLUStat_t stat; void *work = 0; int info, lwork = 0/*, nrhs = 1*/; int i; double ferr[1]; double berr[1]; double rpg, rcond; R *bb; R *xx; A.Store = 0; B.Store = 0; X.Store = 0; L.Store = 0; U.Store = 0; int status; n = AA.n; m = AA.m; nnz = AA.nbcoef; arow = AA.a; asubrow = AA.cl; xarow = AA.lg; /* FreeFem++ use Morse Format */ // FFCS - "this->" required by g++ 4.7 this->CompRow_to_CompCol(m, n, nnz, arow, asubrow, xarow, &a, &asub, &xa); /* Defaults */ lwork = 0; // nrhs = 0; /* Set the default values for options argument: * options.Fact = DOFACT; * options.Equil = YES; * options.ColPerm = COLAMD; * options.DiagPivotThresh = 1.0; * options.Trans = NOTRANS; * options.IterRefine = NOREFINE; * options.SymmetricMode = NO; * options.PivotGrowth = NO; * options.ConditionNumber = NO; * options.PrintStat = YES; */ set_default_options(&options); printf(".. default options:\n"); printf("\tFact\t %8d\n", options.Fact); printf("\tEquil\t %8d\n", options.Equil); printf("\tColPerm\t %8d\n", options.ColPerm); printf("\tDiagPivotThresh %8.4f\n", options.DiagPivotThresh); printf("\tTrans\t %8d\n", options.Trans); printf("\tIterRefine\t%4d\n", options.IterRefine); printf("\tSymmetricMode\t%4d\n", options.SymmetricMode); printf("\tPivotGrowth\t%4d\n", options.PivotGrowth); printf("\tConditionNumber\t%4d\n", options.ConditionNumber); printf("..\n"); if (!string_option.empty()) {read_options_freefem(string_option, &options);} printf(".. options:\n"); printf("\tFact\t %8d\n", options.Fact); printf("\tEquil\t %8d\n", options.Equil); printf("\tColPerm\t %8d\n", options.ColPerm); printf("\tDiagPivotThresh %8.4f\n", options.DiagPivotThresh); printf("\tTrans\t %8d\n", options.Trans); printf("\tIterRefine\t%4d\n", options.IterRefine); printf("\tSymmetricMode\t%4d\n", options.SymmetricMode); printf("\tPivotGrowth\t%4d\n", options.PivotGrowth); printf("\tConditionNumber\t%4d\n", options.ConditionNumber); printf("..\n"); Dtype_t R_SLU = SuperLUDriver<R>::R_SLU_T(); // FFCS - "this->" required by g++ 4.7 this->Create_CompCol_Matrix(&A, m, n, nnz, a, asub, xa, SLU_NC, R_SLU, SLU_GE); this->Create_Dense_Matrix(&B, m, 0, (R *)0, m, SLU_DN, R_SLU, SLU_GE); this->Create_Dense_Matrix(&X, m, 0, (R *)0, m, SLU_DN, R_SLU, SLU_GE); if (etree.size() == 0) {etree.resize(n);} if (perm_r.size() == 0) {perm_r.resize(n);} if (perm_c.size() == 0) {perm_c.resize(n);} if (!(RR = new double[n])) { ABORT("SUPERLU_MALLOC fails for R[]."); } for (int ii = 0; ii < n; ii++) { RR[ii] = 1.; } if (!(CC = new double[m])) { ABORT("SUPERLU_MALLOC fails for C[]."); } for (int ii = 0; ii < n; ii++) { CC[ii] = 1.; } ferr[0] = 0; berr[0] = 0; /* Initialize the statistics variables. */ StatInit(&stat); /* ONLY PERFORM THE LU DECOMPOSITION */ B.ncol = 0; /* Indicate not to solve the system */ SuperLUDriver<R>::gssvx(&options, &A, perm_c, perm_r, etree, equed, RR, CC, &L, &U, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &Glu, &mem_usage, &stat, &info); if (verbosity > 2) { printf("LU factorization: dgssvx() returns info %d\n", info); } if (verbosity > 3) { if (info == 0 || info == n + 1) { if (options.PivotGrowth) {printf("Recip. pivot growth = %e\n", rpg);} if (options.ConditionNumber) { printf("Recip. condition number = %e\n", rcond); } Lstore = (SCformat *)L.Store; Ustore = (NCformat *)U.Store; printf("No of nonzeros in factor L = %d\n", Lstore->nnz); printf("No of nonzeros in factor U = %d\n", Ustore->nnz); printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", mem_usage.for_lu / 1e6, mem_usage.total_needed / 1e6, stat.expansions ); fflush(stdout); } else if (info > 0 && lwork == -1) { printf("** Estimated memory: %d bytes\n", info - n); } } if (verbosity > 5) {StatPrint(&stat);} StatFree(&stat); if (B.Store) {Destroy_SuperMatrix_Store(&B);} if (X.Store) {Destroy_SuperMatrix_Store(&X);} options.Fact = FACTORED;/* Indicate the factored form of A is supplied. */ }
PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F,Mat A,const MatFactorInfo *info) { Mat_SuperLU *lu = (Mat_SuperLU*)F->spptr; Mat_SeqAIJ *aa; PetscErrorCode ierr; PetscInt sinfo; PetscReal ferr, berr; NCformat *Ustore; SCformat *Lstore; PetscFunctionBegin; if (lu->flg == SAME_NONZERO_PATTERN){ /* successing numerical factorization */ lu->options.Fact = SamePattern; /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */ Destroy_SuperMatrix_Store(&lu->A); if (lu->options.Equil){ ierr = MatCopy_SeqAIJ(A,lu->A_dup,SAME_NONZERO_PATTERN);CHKERRQ(ierr); } if ( lu->lwork >= 0 ) { Destroy_SuperNode_Matrix(&lu->L); Destroy_CompCol_Matrix(&lu->U); lu->options.Fact = SamePattern; } } /* Create the SuperMatrix for lu->A=A^T: Since SuperLU likes column-oriented matrices,we pass it the transpose, and then solve A^T X = B in MatSolve(). */ if (lu->options.Equil){ aa = (Mat_SeqAIJ*)(lu->A_dup)->data; } else { aa = (Mat_SeqAIJ*)(A)->data; } #if defined(PETSC_USE_COMPLEX) zCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i, SLU_NC,SLU_Z,SLU_GE); #else dCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i, SLU_NC,SLU_D,SLU_GE); #endif /* Numerical factorization */ lu->B.ncol = 0; /* Indicate not to solve the system */ if (F->factortype == MAT_FACTOR_LU){ #if defined(PETSC_USE_COMPLEX) zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->mem_usage, &lu->stat, &sinfo); #else dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->mem_usage, &lu->stat, &sinfo); #endif } else if (F->factortype == MAT_FACTOR_ILU){ /* Compute the incomplete factorization, condition number and pivot growth */ #if defined(PETSC_USE_COMPLEX) zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->mem_usage, &lu->stat, &sinfo); #else dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->mem_usage, &lu->stat, &sinfo); #endif } else { SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); } if ( !sinfo || sinfo == lu->A.ncol+1 ) { if ( lu->options.PivotGrowth ) ierr = PetscPrintf(PETSC_COMM_SELF," Recip. pivot growth = %e\n", lu->rpg); if ( lu->options.ConditionNumber ) ierr = PetscPrintf(PETSC_COMM_SELF," Recip. condition number = %e\n", lu->rcond); } else if ( sinfo > 0 ){ if ( lu->lwork == -1 ) { ierr = PetscPrintf(PETSC_COMM_SELF," ** Estimated memory: %D bytes\n", sinfo - lu->A.ncol); } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %D",sinfo); } else { /* sinfo < 0 */ SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", sinfo,-sinfo); } if ( lu->options.PrintStat ) { ierr = PetscPrintf(PETSC_COMM_SELF,"MatLUFactorNumeric_SuperLU():\n"); StatPrint(&lu->stat); Lstore = (SCformat *) lu->L.Store; Ustore = (NCformat *) lu->U.Store; ierr = PetscPrintf(PETSC_COMM_SELF," No of nonzeros in factor L = %D\n", Lstore->nnz); ierr = PetscPrintf(PETSC_COMM_SELF," No of nonzeros in factor U = %D\n", Ustore->nnz); ierr = PetscPrintf(PETSC_COMM_SELF," No of nonzeros in L+U = %D\n", Lstore->nnz + Ustore->nnz - lu->A.ncol); ierr = PetscPrintf(PETSC_COMM_SELF," L\\U MB %.3f\ttotal MB needed %.3f\n", lu->mem_usage.for_lu/1e6, lu->mem_usage.total_needed/1e6); } lu->flg = SAME_NONZERO_PATTERN; F->ops->solve = MatSolve_SuperLU; F->ops->solvetranspose = MatSolveTranspose_SuperLU; F->ops->matsolve = MatMatSolve_SuperLU; PetscFunctionReturn(0); }
bool SparseMatrix::solveSLU (Vector& B) { int ierr = ncol+1; if (!factored) this->optimiseSLU(); #ifdef HAS_SUPERLU_MT if (!slu) { // Create a new SuperLU matrix slu = new SuperLUdata; slu->perm_c = new int[ncol]; slu->perm_r = new int[nrow]; dCreate_CompCol_Matrix(&slu->A, nrow, ncol, this->size(), &A.front(), &JA.front(), &IA.front(), SLU_NC, SLU_D, SLU_GE); } else { Destroy_SuperMatrix_Store(&slu->A); Destroy_SuperNode_Matrix(&slu->L); Destroy_CompCol_Matrix(&slu->U); dCreate_CompCol_Matrix(&slu->A, nrow, ncol, this->size(), &A.front(), &JA.front(), &IA.front(), SLU_NC, SLU_D, SLU_GE); } // Get column permutation vector perm_c[], according to permc_spec: // permc_spec = 0: natural ordering // permc_spec = 1: minimum degree ordering on structure of A'*A // permc_spec = 2: minimum degree ordering on structure of A'+A // permc_spec = 3: approximate minimum degree for unsymmetric matrices int permc_spec = 1; get_perm_c(permc_spec, &slu->A, slu->perm_c); // Create right-hand-side/solution vector(s) size_t nrhs = B.size() / nrow; SuperMatrix Bmat; dCreate_Dense_Matrix(&Bmat, nrow, nrhs, B.ptr(), nrow, SLU_DN, SLU_D, SLU_GE); // Invoke the simple driver pdgssv(numThreads, &slu->A, slu->perm_c, slu->perm_r, &slu->L, &slu->U, &Bmat, &ierr); if (ierr > 0) std::cerr <<"SuperLU_MT Failure "<< ierr << std::endl; Destroy_SuperMatrix_Store(&Bmat); #elif defined(HAS_SUPERLU) if (!slu) { // Create a new SuperLU matrix slu = new SuperLUdata(1); slu->perm_c = new int[ncol]; slu->perm_r = new int[nrow]; dCreate_CompCol_Matrix(&slu->A, nrow, ncol, this->size(), &A.front(), &JA.front(), &IA.front(), SLU_NC, SLU_D, SLU_GE); } else if (factored) slu->opts->Fact = FACTORED; // Re-use previous factorization else { Destroy_SuperMatrix_Store(&slu->A); Destroy_SuperNode_Matrix(&slu->L); Destroy_CompCol_Matrix(&slu->U); dCreate_CompCol_Matrix(&slu->A, nrow, ncol, this->size(), &A.front(), &JA.front(), &IA.front(), SLU_NC, SLU_D, SLU_GE); } // Create right-hand-side/solution vector(s) size_t nrhs = B.size() / nrow; SuperMatrix Bmat; dCreate_Dense_Matrix(&Bmat, nrow, nrhs, B.ptr(), nrow, SLU_DN, SLU_D, SLU_GE); SuperLUStat_t stat; StatInit(&stat); // Invoke the simple driver dgssv(slu->opts, &slu->A, slu->perm_c, slu->perm_r, &slu->L, &slu->U, &Bmat, &stat, &ierr); if (ierr > 0) std::cerr <<"SuperLU Failure "<< ierr << std::endl; else factored = true; if (printSLUstat) StatPrint(&stat); StatFree(&stat); Destroy_SuperMatrix_Store(&Bmat); #else std::cerr <<"SparseMatrix::solve: SuperLU solver not available"<< std::endl; #endif return ierr == 0; }
bool SparseMatrix::solveSLUx (Vector& B, Real* rcond) { int ierr = ncol+1; if (!factored) this->optimiseSLU(); #ifdef HAS_SUPERLU_MT if (!slu) { // Create a new SuperLU matrix slu = new SuperLUdata(numThreads); slu->equed = NOEQUIL; slu->perm_c = new int[ncol]; slu->perm_r = new int[nrow]; slu->C = new Real[ncol]; slu->R = new Real[nrow]; slu->opts->etree = new int[ncol]; slu->opts->colcnt_h = new int[ncol]; slu->opts->part_super_h = new int[ncol]; memset(slu->opts->colcnt_h, 0, ncol*sizeof(int)); memset(slu->opts->part_super_h, 0, ncol*sizeof(int)); memset(slu->opts->etree, 0, ncol*sizeof(int)); dCreate_CompCol_Matrix(&slu->A, nrow, ncol, this->size(), &A.front(), &JA.front(), &IA.front(), SLU_NC, SLU_D, SLU_GE); // Get column permutation vector perm_c[], according to permc_spec: // permc_spec = 0: natural ordering // permc_spec = 1: minimum degree ordering on structure of A'*A // permc_spec = 2: minimum degree ordering on structure of A'+A // permc_spec = 3: approximate minimum degree for unsymmetric matrices int permc_spec = 1; get_perm_c(permc_spec, &slu->A, slu->perm_c); } else if (factored) slu->opts->fact = FACTORED; // Re-use previous factorization else slu->opts->refact = YES; // Re-use previous ordering // Create right-hand-side and solution vector(s) Vector X(B.size()); SuperMatrix Bmat, Xmat; const size_t nrhs = B.size() / nrow; dCreate_Dense_Matrix(&Bmat, nrow, nrhs, B.ptr(), nrow, SLU_DN, SLU_D, SLU_GE); dCreate_Dense_Matrix(&Xmat, nrow, nrhs, X.ptr(), nrow, SLU_DN, SLU_D, SLU_GE); Real ferr[nrhs], berr[nrhs]; superlu_memusage_t mem_usage; // Invoke the expert driver pdgssvx(numThreads, slu->opts, &slu->A, slu->perm_c, slu->perm_r, &slu->equed, slu->R, slu->C, &slu->L, &slu->U, &Bmat, &Xmat, &slu->rpg, &slu->rcond, ferr, berr, &mem_usage, &ierr); B.swap(X); if (ierr > 0) std::cerr <<"SuperLU_MT Failure "<< ierr << std::endl; else if (!factored) { factored = true; if (rcond) *rcond = slu->rcond; } Destroy_SuperMatrix_Store(&Bmat); Destroy_SuperMatrix_Store(&Xmat); #elif defined(HAS_SUPERLU) if (!slu) { // Create a new SuperLU matrix slu = new SuperLUdata(1); slu->perm_c = new int[ncol]; slu->perm_r = new int[nrow]; slu->etree = new int[ncol]; slu->C = new Real[ncol]; slu->R = new Real[nrow]; dCreate_CompCol_Matrix(&slu->A, nrow, ncol, this->size(), &A.front(), &JA.front(), &IA.front(), SLU_NC, SLU_D, SLU_GE); } else if (factored) slu->opts->Fact = FACTORED; // Re-use previous factorization else { Destroy_SuperMatrix_Store(&slu->A); Destroy_SuperNode_Matrix(&slu->L); Destroy_CompCol_Matrix(&slu->U); dCreate_CompCol_Matrix(&slu->A, nrow, ncol, this->size(), &A.front(), &JA.front(), &IA.front(), SLU_NC, SLU_D, SLU_GE); } // Create right-hand-side vector and solution vector Vector X(B.size()); SuperMatrix Bmat, Xmat; const size_t nrhs = B.size() / nrow; dCreate_Dense_Matrix(&Bmat, nrow, nrhs, B.ptr(), nrow, SLU_DN, SLU_D, SLU_GE); dCreate_Dense_Matrix(&Xmat, nrow, nrhs, X.ptr(), nrow, SLU_DN, SLU_D, SLU_GE); slu->opts->ConditionNumber = printSLUstat || rcond ? YES : NO; slu->opts->PivotGrowth = printSLUstat ? YES : NO; void* work = 0; int lwork = 0; Real ferr[nrhs], berr[nrhs]; mem_usage_t mem_usage; SuperLUStat_t stat; StatInit(&stat); // Invoke the expert driver #if SUPERLU_VERSION == 5 GlobalLU_t Glu; dgssvx(slu->opts, &slu->A, slu->perm_c, slu->perm_r, slu->etree, slu->equed, slu->R, slu->C, &slu->L, &slu->U, work, lwork, &Bmat, &Xmat, &slu->rpg, &slu->rcond, ferr, berr, &Glu, &mem_usage, &stat, &ierr); #else dgssvx(slu->opts, &slu->A, slu->perm_c, slu->perm_r, slu->etree, slu->equed, slu->R, slu->C, &slu->L, &slu->U, work, lwork, &Bmat, &Xmat, &slu->rpg, &slu->rcond, ferr, berr, &mem_usage, &stat, &ierr); #endif B.swap(X); if (ierr > 0) std::cerr <<"SuperLU Failure "<< ierr << std::endl; else if (!factored) { factored = true; if (rcond) *rcond = slu->rcond; } if (printSLUstat) { StatPrint(&stat); IFEM::cout <<"Reciprocal condition number = "<< slu->rcond <<"\nReciprocal pivot growth = "<< slu->rpg << std::endl; } StatFree(&stat); Destroy_SuperMatrix_Store(&Bmat); Destroy_SuperMatrix_Store(&Xmat); #else std::cerr <<"SparseMatrix::solve: SuperLU solver not available"<< std::endl; #endif return ierr == 0; }
void RKWidget::step ( const size_t nStep ) { DNformat *Bstore; DNformat *Xstore; double temp; if(unstable) return; samset = false; if( dirty ) { if(aexist) { Destroy_CompCol_Matrix(&A); if ( lwork == 0 ) { Destroy_SuperNode_Matrix(&L); Destroy_CompCol_Matrix(&Up); } else if ( lwork > 0 ) { SUPERLU_FREE(work); } // these may be freed in dgssvx or Destroy_CompCol_Matrix I think aexist = false; } a = new double[nvar*N_]; xa = new int[N_+1]; asub = new int[nvar*N_]; updateCoef(method); if(nblock == 1) { // load with coef[] ??? fillA(); } else if (nup+ndn == 0) { // solve seperate independent blocks??? } else { // load block system blockFillA(); } dCreate_CompCol_Matrix(&A, N_, N_, nnz, a, asub, xa, SLU_NC, SLU_D, SLU_GE); aexist = true; /* Initialize the statistics variables. */ StatInit(&stat); dPrint_CompCol_Matrix("A matrix", &A); options.Fact = DOFACT; //options.ColPerm=NATURAL; options.ColPerm=COLAMD; options.PivotGrowth = NO; options.ConditionNumber = NO; /* ONLY PERFORM THE LU DECOMPOSITION */ B.ncol = 0; /* Indicate not to solve the system */ dgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &Up, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info); //dPrint_CompCol_Matrix("A matrix", &A); printf("LU factorization: dgssvx() returns info %d\n", info); if ( info == 0 || info == N_+1 ) { if ( options.PivotGrowth ) printf("Recip. pivot growth = %e\n", rpg); if ( options.ConditionNumber ) printf("Recip. condition number = %e\n", rcond); printf("L\\U_ MB %.3f\ttotal MB needed %.3f\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6); fflush(stdout); options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */ B.ncol = 1; dirty = false; } else if ( info > 0 && lwork == -1 ) { printf("** Estimated memory: %d bytes\n", info - n); } if ( options.PrintStat ) StatPrint(&stat); StatFree(&stat); } for ( size_t n = 0; n < nStep; n++ ) { for( int ns = 0; ns < nStage; ns++ ) { ///set B matrix Bstore= (DNformat *)B.Store; rhsb=(double*)Bstore->nzval; if (nup+ndn == 0) { // solve seperate independent blocks??? std::cout << "Discontinuous Galerkin Not Implimented\n"; return; } else { fillB(ns); } ///solve factored system StatInit(&stat); options.Fact = FACTORED; dgssvx(&options, &A, perm_c, perm_r, etree, equed, R, C, &L, &Up, work, lwork, &B, &X, &rpg, &rcond, ferr, berr, &mem_usage, &stat, &info); //if( n == 1 ) printf("Triangular solve: dgssvx() returns info %d\n", info); ///update U_ if ( info == 0 || info == N_+1 ) { /* This is how you could access the solution matrix. */ Xstore = (DNformat *)X.Store; rhsx = (double*)(Xstore->nzval); for ( size_t i = 0; i < N_ ; i++ ) { if(rhsx[i]> 1e16) unstable = true; b_k[i+N_*ns]=rhsx[i]; } } else { std::cout << "ERROR: Matrix Solution Failed info = " << info << std::endl; } StatFree(&stat); } for(int j=0 ; j<nStage; j++) { temp=b_b[j]; if( temp != 0 ) { for(size_t i = 0; i < N_ ; i++ ) { U_[i] += temp*b_k[i+j*N_]; } } } cStep++; totCFL += CFL; } }
int main ( int argc, char *argv[] ) /**********************************************************************/ /* Purpose: SUPER_LU_S2 solves a symmetric sparse system read from a file. Discussion: The sparse matrix is stored in a file using the Harwell-Boeing sparse matrix format. The file should be assigned to the standard input of this program. For instance, if the matrix is stored in the file "g10_rua.txt", the execution command might be: super_lu_s2 < g10_rua.txt Modified: 25 April 2004 Reference: James Demmel, John Gilbert, Xiaoye Li, SuperLU Users's Guide, Sections 1 and 2. Local parameters: SuperMatrix L, the computed L factor. int *perm_c, the column permutation vector. int *perm_r, the row permutations from partial pivoting. SuperMatrix U, the computed U factor. */ { SuperMatrix A; NCformat *Astore; float *a; int *asub; SuperMatrix B; int info; SuperMatrix L; int ldx; SCformat *Lstore; int m; mem_usage_t mem_usage; int n; int nnz; int nrhs; superlu_options_t options; int *perm_c; int *perm_r; float *rhs; float *sol; SuperLUStat_t stat; SuperMatrix U; NCformat *Ustore; int *xa; float *xact; /* Say hello. */ printf ( "\n" ); printf ( "SUPER_LU_S2:\n" ); printf ( " Read a symmetric sparse matrix A from standard input,\n"); printf ( " stored in Harwell-Boeing Sparse Matrix format.\n" ); printf ( "\n" ); printf ( " Solve a linear system A * X = B.\n" ); /* Set the default input options: options.Fact = DOFACT; options.Equil = YES; options.ColPerm = COLAMD; options.DiagPivotThresh = 1.0; options.Trans = NOTRANS; options.IterRefine = NOREFINE; options.SymmetricMode = NO; options.PivotGrowth = NO; options.ConditionNumber = NO; options.PrintStat = YES; */ set_default_options ( &options ); /* Now we modify the default options to use the symmetric mode. */ options.SymmetricMode = YES; options.ColPerm = MMD_AT_PLUS_A; options.DiagPivotThresh = 0.001; /* Read the matrix in Harwell-Boeing format. */ sreadhb ( &m, &n, &nnz, &a, &asub, &xa ); /* Create storage for a compressed column matrix. */ sCreate_CompCol_Matrix ( &A, m, n, nnz, a, asub, xa, SLU_NC, SLU_S, SLU_GE ); Astore = A.Store; printf ( "\n" ); printf ( " Dimension %dx%d; # nonzeros %d\n", A.nrow, A.ncol, Astore->nnz ); /* Set up the right hand side. */ nrhs = 1; rhs = floatMalloc ( m * nrhs ); if ( !rhs ) { ABORT ( " Malloc fails for rhs[]." ); } sCreate_Dense_Matrix ( &B, m, nrhs, rhs, m, SLU_DN, SLU_S, SLU_GE ); xact = floatMalloc ( n * nrhs ); if ( !xact ) { ABORT ( " Malloc fails for rhs[]." ); } ldx = n; sGenXtrue ( n, nrhs, xact, ldx ); sFillRHS ( options.Trans, nrhs, xact, ldx, &A, &B ); perm_c = intMalloc ( n ); if ( !perm_c ) { ABORT ( "Malloc fails for perm_c[]." ); } perm_r = intMalloc ( m ); if ( !perm_r ) { ABORT ( "Malloc fails for perm_r[]." ); } /* Initialize the statistics variables. */ StatInit ( &stat ); /* Call SGSSV to factor the matrix and solve the linear system. */ sgssv ( &options, &A, perm_c, perm_r, &L, &U, &B, &stat, &info ); if ( info == 0 ) { /* To conveniently access the solution matrix, you need to get a pointer to it. */ sol = (float*) ((DNformat*) B.Store)->nzval; /* Compute the infinity norm of the error. */ sinf_norm_error ( nrhs, &B, xact ); Lstore = (SCformat *) L.Store; Ustore = (NCformat *) U.Store; printf ( "\n" ); printf ( " Number of nonzeros in factor L = %d\n", Lstore->nnz ); printf ( " Number of nonzeros in factor U = %d\n", Ustore->nnz ); printf ( " Number of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n ); sQuerySpace ( &L, &U, &mem_usage ); printf ( "\n" ); printf ( " L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6, mem_usage.expansions); } else { printf ( "\n" ); printf ( " SGSSV error returns INFO= %d\n", info ); if ( info <= n ) { sQuerySpace ( &L, &U, &mem_usage ); printf ( "\n" ); printf (" L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", mem_usage.for_lu/1e6, mem_usage.total_needed/1e6, mem_usage.expansions ); } } if ( options.PrintStat ) { StatPrint ( &stat ); } StatFree ( &stat ); /* Free the memory. */ SUPERLU_FREE ( rhs ); SUPERLU_FREE ( xact ); SUPERLU_FREE ( perm_r ); SUPERLU_FREE ( perm_c ); Destroy_CompCol_Matrix ( &A ); Destroy_SuperMatrix_Store ( &B ); Destroy_SuperNode_Matrix ( &L ); Destroy_CompCol_Matrix ( &U ); /* Say goodbye. */ printf ( "\n" ); printf ( "SUPER_LU_S2:\n" ); printf ( " Normal end of execution.\n"); return 0; }